In [None]:

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

# Merge datasets
merged_df = transactions_df.merge(customers_df, on='CustomerID').merge(products_df, on='ProductID')

# Aggregate customer-level transaction data
customer_summary = merged_df.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'ProductID': 'nunique'
}).reset_index()

# One-hot encode categorical features
customer_summary = customer_summary.merge(customers_df[['CustomerID', 'Region']], on='CustomerID')
customer_summary = pd.get_dummies(customer_summary, columns=['Region'], drop_first=True)

# Scale features
scaler = StandardScaler()
scaled_data = scaler.fit_transform(customer_summary.drop(columns=['CustomerID']))

# Compute similarity
similarity_matrix = cosine_similarity(scaled_data)

# Find top 3 similar customers for first 20 customers
lookalike_results = {}
for i in range(20):
    customer_id = customer_summary.loc[i, 'CustomerID']
    similar_indices = similarity_matrix[i].argsort()[::-1][1:4]
    similar_customers = [(customer_summary.loc[idx, 'CustomerID'], similarity_matrix[i][idx]) for idx in similar_indices]
    lookalike_results[customer_id] = similar_customers

# Save results to CSV
lookalike_df = pd.DataFrame.from_dict(lookalike_results, orient='index')
lookalike_df.columns = ['Similar_Customer_1', 'Similar_Customer_2', 'Similar_Customer_3']
lookalike_df.reset_index(inplace=True)
lookalike_df.rename(columns={'index': 'CustomerID'}, inplace=True)
lookalike_df.to_csv('Harpartap_Singh_Lookalike.csv', index=False)
