In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import numpy as np

customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

transactions = transactions.merge(products, on="ProductID", how="left")
transactions = transactions.merge(customers, on="CustomerID", how="left")

customer_features = transactions.groupby("CustomerID").agg(
    total_spending=("TotalValue", "sum"),
    transaction_count=("TransactionID", "count"),
    avg_quantity=("Quantity", "mean"),
    preferred_category=("Category", lambda x: x.mode()[0])
).reset_index()

customer_features = pd.get_dummies(customer_features, columns=["preferred_category"], prefix="category")

scaler = MinMaxScaler()
numeric_features = ["total_spending", "transaction_count", "avg_quantity"]
customer_features[numeric_features] = scaler.fit_transform(customer_features[numeric_features])

feature_matrix = customer_features.drop("CustomerID", axis=1)
similarity_matrix = cosine_similarity(feature_matrix)

similarity_df = pd.DataFrame(similarity_matrix, index=customer_features["CustomerID"], columns=customer_features["CustomerID"])
lookalikes = {}

for customer_id in similarity_df.index:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    lookalikes[customer_id] = [(idx, round(score, 2)) for idx, score in similar_customers.items()]

lookalike_df = pd.DataFrame({"CustomerID": lookalikes.keys(), "SimilarCustomers": lookalikes.values()})
lookalike_df.to_csv("Lookalike.csv", index=False)


Lookalike recommendations saved to Lookalike.csv.
