In [11]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

In [12]:
customers = pd.read_csv(r'C:\Users\91944\OneDrive\Desktop\Project\zeotap\Customers.csv')
products = pd.read_csv(r'C:\Users\91944\OneDrive\Desktop\Project\zeotap\Products.csv')
transactions = pd.read_csv(r'C:\Users\91944\OneDrive\Desktop\Project\zeotap\Transactions.csv')

In [13]:
transactions_products = pd.merge(transactions, products, on="ProductID", how="left")
full_data = pd.merge(transactions_products, customers, on="CustomerID", how="left")

customer_features = full_data.groupby("CustomerID").agg(
    total_spending=("TotalValue", "sum"),
    avg_transaction_value=("TotalValue", "mean"),
    total_transactions=("TransactionID", "count"),
    distinct_categories=("Category", "nunique"),
    region=("Region", "first"), 
).reset_index()

most_frequent_category = (
    full_data.groupby(["CustomerID", "Category"])["TransactionID"]
    .count()
    .reset_index()
    .sort_values(["CustomerID", "TransactionID"], ascending=[True, False])
    .drop_duplicates(subset=["CustomerID"], keep="first")
    .rename(columns={"Category": "top_category"})
)

customer_features = pd.merge(customer_features, most_frequent_category[["CustomerID", "top_category"]], on="CustomerID")

scaler = MinMaxScaler()
numerical_features = ["total_spending", "avg_transaction_value", "total_transactions", "distinct_categories"]
customer_features[numerical_features] = scaler.fit_transform(customer_features[numerical_features])

encoded_features = pd.get_dummies(customer_features[["region", "top_category"]])
customer_features_encoded = pd.concat([customer_features[numerical_features], encoded_features], axis=1)

similarity_matrix = cosine_similarity(customer_features_encoded)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features["CustomerID"], columns=customer_features["CustomerID"])

lookalike_map = {}
for customer_id in customer_features["CustomerID"][:20]:
    similar_customers = (
        similarity_df[customer_id]
        .sort_values(ascending=False)
        .iloc[1:4]  
        .reset_index()
        .rename(columns={customer_id: "score", "index": "cust_id"})
    )
    lookalike_map[customer_id] = similar_customers.values.tolist()

lookalike_df = pd.DataFrame(
    [(cust_id, row[0], row[1]) for cust_id, rows in lookalike_map.items() for row in rows],
    columns=["cust_id", "lookalike_id", "score"],
)

output_path = r'C:\Users\91944\OneDrive\Desktop\Project\zeotap\Ashwin_Kumar_Lookalike.csv'
lookalike_df.to_csv(output_path, index=False)

print(f"Lookalike.csv saved at: {output_path}")

Lookalike.csv saved at: C:\Users\91944\OneDrive\Desktop\Project\zeotap\Ashwin_Kumar_Lookalike.csv
