##Merge Data

In [1]:
import pandas as pd

customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

data = pd.merge(transactions, customers, on="CustomerID", how="inner")
data = pd.merge(data, products, on="ProductID", how="inner")


In [None]:
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

print(merged_data.head())


  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  2024-04-25 07:38:55         1   
3        T00272      C0087      P067  2024-03-26 22:55:37         2   
4        T00363      C0070      P067  2024-03-21 15:10:10         3   

   TotalValue  Price_x     CustomerName         Region  SignupDate  \
0      300.68   300.68   Andrea Jenkins         Europe  2022-12-03   
1      300.68   300.68  Brittany Harvey           Asia  2024-09-04   
2      300.68   300.68  Kathryn Stevens         Europe  2024-04-04   
3      601.36   300.68  Travis Campbell  South America  2024-04-11   
4      902.04   300.68    Timothy Perez         Europe  2022-03-15   

                       ProductName     Category  Price_y  
0  ComfortLiving Bluetooth Speaker  Electronics   300.68  
1  ComfortLiving Bluetooth Speaker

##Extract Features

#Customer-specific Features:

In [None]:
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'TransactionID': 'count',
    'Category': lambda x: x.mode()[0]
}).rename(columns={'TransactionID': 'TransactionCount'})
print(customer_features.head())


            TotalValue  TransactionCount     Category
CustomerID                                           
C0001          3354.52                 5  Electronics
C0002          1862.74                 4     Clothing
C0003          2725.38                 4   Home Decor
C0004          5354.88                 8        Books
C0005          2034.24                 3  Electronics


#Standardize Features:

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

encoder = LabelEncoder()

customer_features_encoded = customer_features.apply(encoder.fit_transform)

similarity_matrix = cosine_similarity(customer_features_encoded)
print(similarity_matrix)


[[1.         0.99866244 0.99977038 ... 0.99805038 0.9990105  0.99987543]
 [0.99866244 1.         0.99879147 ... 0.99898592 0.99969566 0.9978878 ]
 [0.99977038 0.99879147 1.         ... 0.99902487 0.999482   0.99936529]
 ...
 [0.99805038 0.99898592 0.99902487 ... 1.         0.99972001 0.99695079]
 [0.9990105  0.99969566 0.999482   ... 0.99972001 1.         0.99819215]
 [0.99987543 0.9978878  0.99936529 ... 0.99695079 0.99819215 1.        ]]


#Compute Similarity

In [None]:
import numpy as np

customer_ids = customer_features.index

recommendations = {}

for idx, customer_id in enumerate(customer_ids[:20]):

    scores = similarity_matrix[idx]

    similar_indices = np.argsort(-scores)[1:4]

    recommendations[customer_id] = [(customer_ids[i], scores[i]) for i in similar_indices]

print(recommendations)



{'C0001': [('C0012', 0.9999999253752454), ('C0176', 0.9999993525377122), ('C0129', 0.9999967175834013)], 'C0002': [('C0029', 0.9999834112176652), ('C0088', 0.9999517777697641), ('C0111', 0.9999446917530812)], 'C0003': [('C0178', 0.9999997097021179), ('C0144', 0.999997240659979), ('C0005', 0.9999704644465711)], 'C0004': [('C0165', 0.9999999953893446), ('C0021', 0.9999998713878631), ('C0101', 0.9999992242462938)], 'C0005': [('C0052', 0.9999914754140282), ('C0144', 0.9999857603611165), ('C0112', 0.9999795260275353)], 'C0006': [('C0117', 0.9999996001190012), ('C0074', 0.9999995122698279), ('C0171', 0.9999974202976782)], 'C0007': [('C0120', 0.9999886525676747), ('C0158', 0.9999823985882315), ('C0037', 0.9999741875675856)], 'C0008': [('C0103', 0.9999859116777267), ('C0030', 0.9999753887369268), ('C0184', 0.9999375253313431)], 'C0009': [('C0119', 0.9998668282478016), ('C0077', 0.9998668282478016), ('C0157', 0.9983679844535663)], 'C0010': [('C0029', 0.9999787444336963), ('C0111', 0.99995760921

#Recommend Similar Customers

In [None]:
import pandas as pd

lookalike_df = pd.DataFrame({
    'cust_id': recommendations.keys(),
    'lookalike_list': [str(v) for v in recommendations.values()]
})

lookalike_df.to_csv('Shravani_RS_Lookalike.csv', index=False)
print("Lookalike recommendations saved!")


Lookalike recommendations saved!


In [None]:
import pandas as pd

lookalike_df = pd.read_csv('Shravani_RS_Lookalike.csv')

print(lookalike_df.head())


  cust_id                                     lookalike_list
0   C0001  [('C0012', 0.9999999253752454), ('C0176', 0.99...
1   C0002  [('C0029', 0.9999834112176652), ('C0088', 0.99...
2   C0003  [('C0178', 0.9999997097021179), ('C0144', 0.99...
3   C0004  [('C0165', 0.9999999953893446), ('C0021', 0.99...
4   C0005  [('C0052', 0.9999914754140282), ('C0144', 0.99...
