In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

transactions = transactions.merge(products, on='ProductID', how='left')
data = transactions.merge(customers, on='CustomerID', how='left')

customer_spending = data.groupby('CustomerID')['TotalValue'].sum().reset_index()
customer_spending.columns = ['CustomerID', 'TotalSpending']

category_pref = data.pivot_table(index='CustomerID', columns='Category', values='Quantity', aggfunc='sum', fill_value=0)

features = customer_spending.set_index('CustomerID').join(category_pref)

scaler = StandardScaler()
normalized_features = scaler.fit_transform(features)
features_normalized_df = pd.DataFrame(normalized_features, index=features.index, columns=features.columns)

similarity_matrix = cosine_similarity(features_normalized_df)
similarity_df = pd.DataFrame(similarity_matrix, index=features.index, columns=features.index)

def get_top_similar_customers(customer_id, similarity_df, n=3):
    similar_customers = similarity_df.loc[customer_id].sort_values(ascending=False).iloc[1:n+1]
    return [(idx, score) for idx, score in similar_customers.items()]

lookalike_results = {}
for customer_id in features.index[:20]:  # First 20 customers
    lookalike_results[customer_id] = get_top_similar_customers(customer_id, similarity_df)

lookalike_csv_data = []
for customer_id, lookalikes in lookalike_results.items():
    for similar_customer, score in lookalikes:
        lookalike_csv_data.append({'CustomerID': customer_id, 'SimilarCustomerID': similar_customer, 'SimilarityScore': score})

lookalike_df = pd.DataFrame(lookalike_csv_data)
lookalike_df.to_csv('FirstName_LastName_Lookalike.csv', index=False)

print(lookalike_df.head(10))


  CustomerID SimilarCustomerID  SimilarityScore
0      C0001             C0069         0.933006
1      C0001             C0026         0.926038
2      C0001             C0120         0.889766
3      C0002             C0159         0.974678
4      C0002             C0178         0.956297
5      C0002             C0133         0.946672
6      C0003             C0195         0.845733
7      C0003             C0166         0.813676
8      C0003             C0031         0.783814
9      C0004             C0065         0.937042
