# **Importing libraries and Loading dataset**

In [6]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# **Lookalike Customer Model for Recommendation Based on Transaction History and Product Preferences**

In [7]:
merged_data = pd.merge(transactions, customers, on='CustomerID', how='left')
merged_data = pd.merge(merged_data, products, on='ProductID', how='left')

customer_profile = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'ProductID': 'nunique',
    'Region': 'first'
}).reset_index()

customer_profile_normalized = customer_profile.copy()
customer_profile_normalized[['TotalValue', 'Quantity', 'ProductID']] = (
    customer_profile[['TotalValue', 'Quantity', 'ProductID']] - customer_profile[['TotalValue', 'Quantity', 'ProductID']].mean()
) / customer_profile[['TotalValue', 'Quantity', 'ProductID']].std()

cosine_sim = cosine_similarity(customer_profile_normalized[['TotalValue', 'Quantity', 'ProductID']])

cosine_sim_df = pd.DataFrame(cosine_sim, index=customer_profile['CustomerID'], columns=customer_profile['CustomerID'])

lookalike_dict = {}

for cust_id in customer_profile['CustomerID'][:20]:
    similar_customers = cosine_sim_df[cust_id].sort_values(ascending=False).iloc[1:4]
    lookalike_dict[cust_id] = [(similar_customer, similar_customers[similar_customer]) for similar_customer in similar_customers.index]

lookalike_df = pd.DataFrame([
    {'CustomerID': cust_id, 'Lookalikes': lookalikes}
    for cust_id, lookalikes in lookalike_dict.items()
])

lookalike_df.to_csv('Sumant_Lokhande_Lookalike.csv', index=False)

print(lookalike_df.head())


  CustomerID                                         Lookalikes
0      C0001  [(C0164, 0.9684103747672835), (C0137, 0.962080...
1      C0002  [(C0029, 0.999761634349898), (C0031, 0.9990127...
2      C0003  [(C0176, 0.8906401232895583), (C0027, 0.863579...
3      C0004  [(C0075, 0.997674065238924), (C0175, 0.9940844...
4      C0005  [(C0058, 0.9997982043779897), (C0123, 0.999705...
