In [5]:
import pandas as pd

Customers_df    = pd.read_csv('/content/Customers.csv')
Products_df     = pd.read_csv('/content/Products.csv')
Transactions_df = pd.read_csv('/content/Transactions.csv')

In [33]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime, timedelta

customer_features = pd.get_dummies(Customers_df[['Region']])

Customers_df['SignupDate'] = pd.to_datetime(Customers_df['SignupDate'])
current_date = datetime.now()
customer_features['DaysSinceSignup'] = (current_date - Customers_df['SignupDate']).dt.days

customer_product_matrix = pd.pivot_table(
    Transactions_df,
    values='TotalValue',
    index='CustomerID',
    columns='ProductID',
    aggfunc='sum',
    fill_value=0
)

customer_features.set_index(Customers_df['CustomerID'], inplace=True)
combined_features = pd.concat([customer_features, customer_product_matrix], axis=1)
combined_features = combined_features.fillna(0)

similarity_matrix = cosine_similarity(combined_features)
similarity_df = pd.DataFrame(
    similarity_matrix,
    index=combined_features.index,
    columns=combined_features.index
)

def get_top_lookalikes(customer_id, similarity_df, n=3):
    customer_similarities = similarity_df.loc[customer_id].sort_values(ascending=False)
    # Exclude the customer itself (similarity = 1.0)
    top_similar = customer_similarities[1:n+1]
    return [(idx, score) for idx, score in top_similar.items()]

lookalike_results = []
for cust_id in Customers_df['CustomerID'][:20]:
    top_3 = get_top_lookalikes(cust_id, similarity_df)
    lookalike_results.append({
        'CustomerID': cust_id,
        'Lookalikes': top_3
    })

lookalike_df = pd.DataFrame(lookalike_results)
lookalike_df['Lookalikes'] = lookalike_df['Lookalikes'].apply(lambda x: ', '.join([f"{cid}({score:.3f})" for cid, score in x]))

lookalike_df.to_csv('Pushpender_Singh_Lookalike.csv', index=False)

print("Sample lookalike recommendations:")
print(lookalike_df.head())

Sample lookalike recommendations:
  CustomerID                                Lookalikes
0      C0001  C0100(0.576), C0120(0.568), C0050(0.547)
1      C0002  C0180(0.731), C0060(0.729), C0071(0.712)
2      C0003  C0181(0.638), C0186(0.576), C0067(0.572)
3      C0004  C0070(0.546), C0063(0.511), C0133(0.468)
4      C0005  C0192(0.751), C0096(0.678), C0180(0.581)
