In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


# Load the datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')


In [7]:
# Merge datasets for customer-product data
merged = pd.merge(transactions, customers, on='CustomerID')
customer_product_matrix = merged.pivot_table(index='CustomerID', columns='ProductID', values='Quantity', fill_value=0)


In [8]:

# Compute cosine similarity
similarity_matrix = cosine_similarity(customer_product_matrix)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index)




In [9]:
# Get top 3 similar customers for the first 20 customers
lookalikes = {}
for customer_id in similarity_df.index[:20]:
    similar_customers = similarity_df.loc[customer_id].sort_values(ascending=False)[1:4]
    lookalikes[customer_id] = [(cust_id, round(score, 2)) for cust_id, score in similar_customers.items()]

# Save to Lookalike.csv
import csv
with open('FirstName_LastName_Lookalike.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['CustomerID', 'Lookalikes'])
    for cust_id, lookalike_data in lookalikes.items():
        writer.writerow([cust_id, lookalike_data])