In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load customer data
customers = pd.read_csv('/workspaces/eCommerce-Transaction-Dataset/Customers.csv')

# Load product data
products = pd.read_csv('/workspaces/eCommerce-Transaction-Dataset/Products.csv')

# Sample transaction data (assuming it's available)
transactions = pd.DataFrame({
    'CustomerID': ['C0001', 'C0002', 'C0003', 'C0004'],
    'ProductID': ['P001', 'P002', 'P003', 'P004'],
    'PurchaseAmount': [20, 150, 50, 80]
})

# Merge customer and transaction data
customer_transactions = pd.merge(customers, transactions, on='CustomerID')
customer_transactions = pd.merge(customer_transactions, products, on='ProductID')

# Feature engineering
customer_features = customer_transactions.groupby('CustomerID').agg({
    'Region': 'first',
    'SignupDate': 'first',
    'PurchaseAmount': 'sum',
    'Price': 'mean',
    'Category': lambda x: x.mode()[0]  # Assuming each customer has a dominant category
}).reset_index()

# Encode categorical features
customer_features = pd.get_dummies(customer_features, columns=['Region', 'Category'])

# Normalize numerical features
scaler = StandardScaler()
customer_features[['PurchaseAmount', 'Price']] = scaler.fit_transform(customer_features[['PurchaseAmount', 'Price']])

# Function to find similar customers
def find_similar_customers(customer_id, top_n=3):
    target_customer = customer_features[customer_features['CustomerID'] == customer_id]
    other_customers = customer_features[customer_features['CustomerID'] != customer_id]
    
    similarities = cosine_similarity(target_customer.drop(columns=['CustomerID', 'SignupDate']), other_customers.drop(columns=['CustomerID', 'SignupDate']))
    other_customers = other_customers.copy()
    other_customers.loc[:, 'SimilarityScore'] = similarities[0]
    
    similar_customers = other_customers.sort_values(by='SimilarityScore', ascending=False).head(top_n)
    return similar_customers[['CustomerID', 'SimilarityScore']]

# Example usage
similar_customers = find_similar_customers('C0001')
print(similar_customers)

  CustomerID  SimilarityScore
2      C0003         0.762263
3      C0004         0.305224
1      C0002        -0.354028
