In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
customers_df = pd.read_csv("Customers.csv")
products_df = pd.read_csv("Products.csv")
transactions_df = pd.read_csv("Transactions.csv")

In [3]:
# Merge transactions with products
transactions_products = pd.merge(transactions_df, products_df, on="ProductID")

# Merge the result with customers
merged_df = pd.merge(transactions_products, customers_df, on="CustomerID")

In [4]:
customer_product_matrix = merged_df.pivot_table(
    index="CustomerID", 
    columns="ProductID", 
    values="Quantity", 
    fill_value=0
)


In [5]:
similarity_matrix = cosine_similarity(customer_product_matrix)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index)


In [6]:
lookalikes = {}
for customer_id in customer_product_matrix.index[:20]:  # First 20 customers
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]  # Exclude the customer themselves
    lookalikes[customer_id] = similar_customers.items()

# Convert the lookalikes to a DataFrame
lookalike_df = pd.DataFrame({
    "CustomerID": lookalikes.keys(),
    "Lookalikes": [list(map(list, values)) for values in lookalikes.values()]
})


In [8]:
lookalike_df.to_csv("Lookalike.csv", index=False)