In [1]:
#model
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

#LD THE ds
customer = pd.read_csv("Customers.csv")
product = pd.read_csv("Products.csv")
transaction = pd.read_csv("Transactions.csv")


In [2]:
#data prep
data = transaction.merge(customer, on="CustomerID").merge(product, on="ProductID")
customer_spending = data.groupby("CustomerID")["TotalValue"].sum().reset_index()
customer_product_pivot = data.pivot_table(index="CustomerID", columns="Category", values="TotalValue", aggfunc="sum", fill_value=0)

customer_profile = customer.merge(customer_spending, on="CustomerID", how="left").fillna(0)
customer_profile = customer_profile.merge(customer_product_pivot, on="CustomerID", how="left").fillna(0)
customer_profile = customer_profile.fillna(0)
customer_profile = pd.get_dummies(customer_profile, columns=["Region"], drop_first=True)


numeric_cols = customer_profile.select_dtypes(include=[np.number]).columns
customer_features = StandardScaler().fit_transform(customer_profile[numeric_cols])


In [3]:
similarity_matrix = cosine_similarity(customer_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profile["CustomerID"], columns=customer_profile["CustomerID"])
def get_top_3_similar(customers_list):
    lookalike_map = {}

    for cust_id in customers_list:
        similar_customers = similarity_df[cust_id].drop(index=cust_id).nlargest(3)
        lookalike_map[cust_id] = list(similar_customers.items())

    return lookalike_map

top_20_customers = customer["CustomerID"].iloc[:20].tolist()
lookalike_results = get_top_3_similar(top_20_customers)
lookalike_df = pd.DataFrame([
    {"CustomerID": cust, "Lookalikes": str(lookalike_results[cust])}
    for cust in lookalike_results
])


In [21]:
lookalike_df.to_csv("Lookalike.csv", index=False)
