In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler



In [2]:
# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')


In [5]:

# Merge datasets
merged_data = transactions.merge(customers, on='CustomerID', how='left')
merged_data = merged_data.merge(products, on='ProductID', how='left')


In [10]:
total_spending = merged_data.groupby('CustomerID')['TotalValue'].sum()

In [7]:
transaction_count = merged_data.groupby('CustomerID')['TransactionID'].count()

In [8]:
category_counts = pd.crosstab(merged_data['CustomerID'], merged_data['Category'])

In [11]:
customer_features = pd.DataFrame({'TotalSpending': total_spending,'TransactionCount': transaction_count}).join(category_counts).fillna(0)

In [13]:
from sklearn.metrics.pairwise import cosine_similarity
similarity_matrix = cosine_similarity(customer_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features.index, columns=customer_features.index)

In [14]:
lookalike_map = {}

# Loop through each customer and find top 3 similar customers
for customer in similarity_df.index:
    similar_customers = similarity_df.loc[customer].sort_values(ascending=False)[1:4]
    lookalike_map[customer] = [(cust, round(score, 2)) for cust, score in similar_customers.items()]

# Convert lookalike map to a DataFrame
lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalike_map.keys()),
    'Lookalikes': list(lookalike_map.values())
})

# Save the result to a CSV file
lookalike_df.to_csv('Lookalike.csv', index=False)


In [16]:

top = lookalike_df[lookalike_df['CustomerID'].isin(customers['CustomerID'][:20])]
print(top)


   CustomerID                                  Lookalikes
0       C0001  [(C0035, 1.0), (C0146, 1.0), (C0005, 1.0)]
1       C0002  [(C0134, 1.0), (C0103, 1.0), (C0062, 1.0)]
2       C0003  [(C0195, 1.0), (C0166, 1.0), (C0179, 1.0)]
3       C0004  [(C0177, 1.0), (C0113, 1.0), (C0075, 1.0)]
4       C0005  [(C0197, 1.0), (C0095, 1.0), (C0001, 1.0)]
5       C0006  [(C0082, 1.0), (C0185, 1.0), (C0187, 1.0)]
6       C0007  [(C0140, 1.0), (C0045, 1.0), (C0051, 1.0)]
7       C0008  [(C0031, 1.0), (C0189, 1.0), (C0098, 1.0)]
8       C0009  [(C0049, 1.0), (C0058, 1.0), (C0077, 1.0)]
9       C0010  [(C0034, 1.0), (C0030, 1.0), (C0150, 1.0)]
10      C0011  [(C0126, 1.0), (C0027, 1.0), (C0070, 1.0)]
11      C0012  [(C0065, 1.0), (C0152, 1.0), (C0104, 1.0)]
12      C0013  [(C0016, 1.0), (C0183, 1.0), (C0105, 1.0)]
13      C0014  [(C0128, 1.0), (C0151, 1.0), (C0172, 1.0)]
14      C0015  [(C0160, 1.0), (C0038, 1.0), (C0137, 1.0)]
15      C0016  [(C0183, 1.0), (C0107, 1.0), (C0013, 1.0)]
16      C0017 