In [1]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [2]:
# Load Data
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [3]:
# Merge Datasets
data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

In [6]:
# Feature Engineering
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'ProductID': 'nunique'
}).rename(columns={'TotalValue': 'TotalSpend', 'Quantity': 'TotalItems', 'ProductID': 'UniqueProducts'})

In [8]:
# Normalize Features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features)

In [11]:
# Similarity Calculation
similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features.index, columns=customer_features.index)

In [12]:
# Top 3 Lookalikes for Customers C0001 to C0020
lookalike_dict = {}
for customer_id in customer_features.index[:20]:
    similar_customers = similarity_df.loc[customer_id].sort_values(ascending=False)[1:4]
    lookalike_dict[customer_id] = list(zip(similar_customers.index, similar_customers.values))

In [13]:
# Save as CSV
lookalike_output = pd.DataFrame([
    {'cust_id': k, 'lookalikes': str(v)} for k, v in lookalike_dict.items()
])
lookalike_output.to_csv('FirstName_LastName_Lookalike.csv', index=False)

In [14]:
lookalike = pd.read_csv('FirstName_LastName_Lookalike.csv')

In [15]:
# Preview the output
print(lookalike.head())

  cust_id                                         lookalikes
0   C0001  [('C0164', 0.9684103747672834), ('C0137', 0.96...
1   C0002  [('C0029', 0.9997616343498978), ('C0031', 0.99...
2   C0003  [('C0176', 0.8906401232895584), ('C0027', 0.86...
3   C0004  [('C0075', 0.9976740652389241), ('C0175', 0.99...
4   C0005  [('C0058', 0.9997982043779898), ('C0123', 0.99...


In [16]:
print(lookalike)

   cust_id                                         lookalikes
0    C0001  [('C0164', 0.9684103747672834), ('C0137', 0.96...
1    C0002  [('C0029', 0.9997616343498978), ('C0031', 0.99...
2    C0003  [('C0176', 0.8906401232895584), ('C0027', 0.86...
3    C0004  [('C0075', 0.9976740652389241), ('C0175', 0.99...
4    C0005  [('C0058', 0.9997982043779898), ('C0123', 0.99...
5    C0006  [('C0079', 0.9998795967431424), ('C0196', 0.99...
6    C0007  [('C0140', 0.9985337287784547), ('C0085', 0.99...
7    C0008  [('C0179', 0.9965086350321659), ('C0081', 0.99...
8    C0009  [('C0192', 0.9983620640345526), ('C0177', 0.99...
9    C0010  [('C0142', 0.964094866380962), ('C0027', 0.956...
10   C0011  [('C0023', 0.9819122814188915), ('C0064', 0.97...
11   C0012  [('C0041', 0.997318308138947), ('C0045', 0.995...
12   C0013  [('C0059', 0.9992581092102039), ('C0141', 0.99...
13   C0014  [('C0033', 0.9996378284444327), ('C0095', 0.99...
14   C0015  [('C0131', 0.9999953416670492), ('C0058', 0.99...
15   C00