In [18]:
#Lookalike Model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

data = pd.merge(transactions, customers, on="CustomerID")
data = pd.merge(data, products, on="ProductID")

customer_product_matrix = data.pivot_table(index='CustomerID', columns='ProductName', values='Quantity', aggfunc='sum', fill_value=0)

scaler = StandardScaler()
normalized_matrix = scaler.fit_transform(customer_product_matrix)

similarity_matrix = cosine_similarity(normalized_matrix)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index)

lookalike_results = {}
for customer_id in similarity_df.index:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    lookalike_results[customer_id] = list(zip(similar_customers.index[:3], similar_customers.values[:3]))

lookalike_output = {cust_id: lookalike_results.get(cust_id, []) for cust_id in similarity_df.index if cust_id in [f'C{i:04d}' for i in range(1, 21)]}
lookalike_df = pd.DataFrame([{"CustomerID": key, "Lookalikes": value} for key, value in lookalike_output.items()])

lookalike_df.to_csv("Lookalike.csv", index=False)

print(lookalike_df.head(3))


  CustomerID                                         Lookalikes
0      C0001  [(C0050, 0.510312355400559), (C0121, 0.4592921...
1      C0002  [(C0030, 0.6735706674412992), (C0173, 0.437667...
2      C0003  [(C0164, 0.6643842648093717), (C0144, 0.617294...
