In [9]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict

# Load data
customers_df = pd.read_csv('Customers.csv')
transactions_df = pd.read_csv('Transactions.csv')
products_df = pd.read_csv('Products.csv')

# Merge transactions with product data
transactions_df = pd.merge(transactions_df, products_df, on='ProductID', how='left')

# Aggregate the transaction data to create customer profiles
# We will use product categories and total spending as features
customer_profile = transactions_df.groupby(['CustomerID', 'Category']).agg(
    total_spent=('TotalValue', 'sum'),
    total_quantity=('Quantity', 'sum')
).reset_index()

# Pivot the data to create a customer-item matrix (rows: customers, columns: product categories)
customer_category_matrix = customer_profile.pivot_table(index='CustomerID', columns='Category', 
                                                       values='total_spent', aggfunc='sum', fill_value=0)

# Normalize the data (scaling the total_spent to avoid high magnitude bias)
customer_category_matrix = customer_category_matrix.apply(lambda x: (x - x.mean()) / x.std(), axis=1)

# Calculate cosine similarity between customers based on their spending in different categories
similarity_matrix = cosine_similarity(customer_category_matrix)

# Create a mapping of CustomerID -> Top 3 similar customers
lookalike_map = defaultdict(list)

for i, customer_id in enumerate(customer_category_matrix.index):
    similarity_scores = similarity_matrix[i]
    # Create a list of (CustomerID, similarity score) pairs
    similar_customers = [(customer_category_matrix.index[j], similarity_scores[j]) for j in range(len(similarity_scores)) if i != j]
    # Sort customers by similarity score (descending) and get the top 3
    similar_customers_sorted = sorted(similar_customers, key=lambda x: x[1], reverse=True)[:3]
    lookalike_map[customer_id] = similar_customers_sorted

# Save the results to Lookalike.csv
lookalike_data = []
for customer_id, similar_customers in lookalike_map.items():
    for similar_customer, score in similar_customers:
        lookalike_data.append([customer_id, similar_customer, score])

lookalike_df = pd.DataFrame(lookalike_data, columns=['CustomerID', 'RecommendedCustomerID', 'SimilarityScore'])
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike model completed and saved as Lookalike.csv")




Lookalike model completed and saved as Lookalike.csv
