In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


In [3]:
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

In [4]:
data = pd.merge(transactions, customers, on='CustomerID')


In [6]:
features = data.groupby('CustomerID').agg({
    'Price': 'mean',
    'ProductID': 'nunique',
    'Quantity': 'sum',
    'TotalValue': 'sum'
}).reset_index()


In [7]:
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features.drop('CustomerID', axis=1))

In [8]:
similarity_matrix = cosine_similarity(features_scaled)


In [9]:
similarity_df = pd.DataFrame(similarity_matrix, index=features['CustomerID'], columns=features['CustomerID'])


In [10]:
lookalike_dict = {}
for customer_id in features['CustomerID'][:20]:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]
    lookalike_dict[customer_id] = list(zip(similar_customers.index, similar_customers.values))

In [11]:
lookalike_df = pd.DataFrame.from_dict(lookalike_dict, orient='index')
lookalike_df.to_csv('Lookalike.csv', header=False)

lookalike_df.head(20)

Unnamed: 0,0,1,2
C0001,"(C0137, 0.969685269503663)","(C0103, 0.9657789082187784)","(C0191, 0.9409198046440858)"
C0002,"(C0029, 0.9998346436457755)","(C0077, 0.9939072127347692)","(C0025, 0.9889603028442084)"
C0003,"(C0010, 0.9550763324079787)","(C0111, 0.9326527836871795)","(C0176, 0.9279947558709847)"
C0004,"(C0075, 0.9971068065471752)","(C0068, 0.9851805978519725)","(C0175, 0.9848549123201815)"
C0005,"(C0130, 0.9980231355905361)","(C0128, 0.9967786769025477)","(C0020, 0.9964012716569236)"
C0006,"(C0196, 0.9952313704679643)","(C0079, 0.988178652169117)","(C0168, 0.9756819078734712)"
C0007,"(C0125, 0.9970153634906015)","(C0085, 0.9966971991971478)","(C0078, 0.9893136531475866)"
C0008,"(C0179, 0.9917118508589422)","(C0090, 0.9839674098438557)","(C0084, 0.9635795660295811)"
C0009,"(C0192, 0.9983815467773353)","(C0128, 0.9864294371721154)","(C0061, 0.9737613761923298)"
C0010,"(C0142, 0.9850184639306189)","(C0121, 0.9744348625974096)","(C0094, 0.9668245719036228)"


: 