In [2]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import csv

customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

merged_df = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

merged_df.fillna({
    'TotalValue': merged_df['TotalValue'].median(), 
    'Quantity': merged_df['Quantity'].median(),  
    'Category': 'Unknown'  '
}, inplace=True)

customer_profiles = merged_df.groupby('CustomerID').agg({
    'TotalValue': 'sum',    
    'Quantity': 'sum',      
    'Category': lambda x: x.mode().iloc[0] if not x.mode().empty else 'Unknown'  
})

encoder = OneHotEncoder(drop='first', sparse_output=False)
encoded_category = encoder.fit_transform(customer_profiles[['Category']])

encoded_df = pd.DataFrame(encoded_category, columns=encoder.get_feature_names_out(['Category']))
customer_profiles_encoded = pd.concat([customer_profiles.drop(columns=['Category']), encoded_df], axis=1)

customer_profiles_encoded.fillna(0, inplace=True)

similarity_matrix = cosine_similarity(customer_profiles_encoded)

def get_top_3_similar(customer_id, similarity_matrix, customer_ids):
    idx = np.where(customer_ids == customer_id)[0][0]
    similar_indices = np.argsort(similarity_matrix[idx])[-4:-1]  # Get top 3 (excluding self)
    return [(customer_ids[i], similarity_matrix[idx][i]) for i in similar_indices]


customer_ids = customer_profiles.index.values


lookalike_dict = {cust: get_top_3_similar(cust, similarity_matrix, customer_ids) for cust in customer_ids[:20]}


with open('FirstName_LastName_Lookalike.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['CustomerID', 'Lookalikes'])
    for cust_id, lookalikes in lookalike_dict.items():
        writer.writerow([cust_id, lookalikes])

print("Lookalike recommendations have been successfully saved to CSV.")


Lookalike recommendations have been successfully saved to CSV.
