In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [27]:
# Load datasets
customers = pd.read_csv(r"C:\Users\shash\OneDrive\Desktop\shashank\Customers.csv")
products = pd.read_csv(r"C:\Users\shash\OneDrive\Desktop\shashank\Products.csv")
transactions = pd.read_csv(r"C:\Users\shash\OneDrive\Desktop\shashank\Transactions.csv")

In [29]:
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

In [31]:
merged_df = transactions.merge(customers, on='CustomerID', how='left')
merged_df = merged_df.merge(products, on='ProductID', how='left')


In [33]:
customer_features = merged_df.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'TransactionID': 'count',
    'TransactionDate': lambda x: (x.max() - x.min()).days,
    'ProductID': pd.Series.mode
}).rename(columns={'TotalValue': 'Total_Spend', 'TransactionID': 'Num_Transactions', 'TransactionDate': 'Recency', 'ProductID': 'Favorite_Product'})


In [35]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features[['Total_Spend', 'Num_Transactions', 'Recency']])

In [39]:
similarity_matrix = cosine_similarity(scaled_features)

In [41]:
customer_ids = customer_features.index[:20]
lookalikes = {}
for idx, cust_id in enumerate(customer_ids):
    similar_indices = np.argsort(similarity_matrix[idx])[::-1][1:4]
    lookalikes[cust_id] = [(customer_features.index[i], similarity_matrix[idx][i]) for i in similar_indices]


In [45]:
# Save results to CSV
lookalike_df = pd.DataFrame(list(lookalikes.items()), columns=['CustomerID', 'Lookalikes'])
lookalike_df.to_csv("Shashank_R_Lookalike.csv", index=False)
