In [69]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity


# loading the data sets

In [70]:
#loading the data sets
Customers = pd.read_csv(R'C:\Users\anush\Downloads\Customers.csv')
Products = pd.read_csv(R"C:\Users\anush\Downloads\Products.csv")
Transactions = pd.read_csv(R"C:\Users\anush\Downloads\Transactions.csv")

In [71]:
print(Customers.describe())

       CustomerID      CustomerName         Region  SignupDate
count         200               200            200         200
unique        200               200              4         179
top         C0001  Lawrence Carroll  South America  2024-11-11
freq            1                 1             59           3


In [72]:
merged_data = Transactions.merge(Customers, on= "CustomerID")
merged_data1 = merged_data.merge(Products, on = "ProductID")
print(merged_data1.head())

  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  2024-04-25 07:38:55         1   
3        T00272      C0087      P067  2024-03-26 22:55:37         2   
4        T00363      C0070      P067  2024-03-21 15:10:10         3   

   TotalValue  Price_x     CustomerName         Region  SignupDate  \
0      300.68   300.68   Andrea Jenkins         Europe  2022-12-03   
1      300.68   300.68  Brittany Harvey           Asia  2024-09-04   
2      300.68   300.68  Kathryn Stevens         Europe  2024-04-04   
3      601.36   300.68  Travis Campbell  South America  2024-04-11   
4      902.04   300.68    Timothy Perez         Europe  2022-03-15   

                       ProductName     Category  Price_y  
0  ComfortLiving Bluetooth Speaker  Electronics   300.68  
1  ComfortLiving Bluetooth Speaker

# Merge datasets

In [73]:
# Normalize numerical features
scaler = StandardScaler()
numerical_features = ['TotalSpend', 'AvgSpendPerTransaction', 'TransactionCount']
customer_stats[numerical_features] = scaler.fit_transform(customer_stats[numerical_features])

# Encode categorical feature
label_encoder = LabelEncoder()
customer_stats['MostCommonCategory'] = label_encoder.fit_transform(customer_stats['MostCommonCategory'])



In [85]:
# Similarity Calculation
similarity_matrix = pd.DataFrame( cosine_similarity(customer_stats.drop('CustomerID', axis=1)),index=customer_stats['CustomerID'],columns=customer_stats['CustomerID'])

# Generate Lookalike Recommendations

lookalike_map = {}
for customer_id in similarity_matrix.index[:20]:  # First 20 customers
    # Sort similarity scores for the customer, exclude itself, and select top 3
    similar_customers = similarity_matrix.loc[customer_id].sort_values(ascending=False).iloc[1:4]
    # Map customer_id to its top 3 similar customers with rounded scores
    lookalike_map[customer_id] = [(sim_cust, round(sim_score, 2)) for sim_cust, sim_score in similar_customers.items()]




# save to Lookalike.csv

In [86]:
# Save to Lookalike.csv
lookalike_df = pd.DataFrame({
    'CustomerID': lookalike_map.keys(),
    'Lookalikes': [str(v) for v in lookalike_map.values()]})
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike model completed. Results saved to Lookalike.csv.")
print(lookalike_df)


Lookalike model completed. Results saved to Lookalike.csv.
   CustomerID                                         Lookalikes
0       C0001  [('C0152', 1.0), ('C0164', 1.0), ('C0160', 0.99)]
1       C0002  [('C0029', 1.0), ('C0010', 1.0), ('C0060', 0.98)]
2       C0003  [('C0178', 1.0), ('C0052', 1.0), ('C0166', 0.99)]
3       C0004   [('C0021', 1.0), ('C0075', 1.0), ('C0156', 1.0)]
4       C0005  [('C0112', 1.0), ('C0197', 1.0), ('C0133', 0.99)]
5       C0006  [('C0117', 1.0), ('C0168', 0.99), ('C0185', 0....
6       C0007  [('C0120', 1.0), ('C0092', 1.0), ('C0080', 0.98)]
7       C0008  [('C0084', 0.99), ('C0162', 0.97), ('C0184', 0...
8       C0009  [('C0077', 1.0), ('C0083', 1.0), ('C0033', 0.98)]
9       C0010  [('C0029', 1.0), ('C0002', 1.0), ('C0009', 0.98)]
10      C0011   [('C0064', 1.0), ('C0187', 1.0), ('C0018', 1.0)]
11      C0012  [('C0039', 0.99), ('C0067', 0.99), ('C0105', 0...
12      C0013  [('C0143', 1.0), ('C0054', 1.0), ('C0099', 0.99)]
13      C0014  [('C0151', 1.0),