In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

# Merge transactions with product data
transactions = transactions.merge(products, on="ProductID")

# Aggregate purchase behavior per customer
customer_features = transactions.groupby("CustomerID").agg(
    total_spent=("TotalValue", "sum"),
    avg_quantity=("Quantity", "mean"),
    purchase_count=("TransactionID", "count"),
    unique_products=("ProductID", "nunique")
).reset_index()

# Merge with customer demographic data
customer_data = customers.merge(customer_features, on="CustomerID", how="left").fillna(0)

# Encode categorical variables (One-Hot Encoding for Region)
customer_data = pd.get_dummies(customer_data, columns=["Region"], drop_first=True)

# Scale numerical features
scaler = StandardScaler()
feature_columns = ["total_spent", "avg_quantity", "purchase_count", "unique_products"] + list(customer_data.columns[5:])
customer_data_scaled = scaler.fit_transform(customer_data[feature_columns])

# Compute similarity matrix
similarity_matrix = cosine_similarity(customer_data_scaled)

# Get top 3 similar customers for each customer
customer_ids = customer_data["CustomerID"].tolist()
lookalike_dict = {}

for idx, cust_id in enumerate(customer_ids):
    similar_indices = similarity_matrix[idx].argsort()[::-1][1:4]  # Get top 3 excluding itself
    lookalike_dict[cust_id] = [(customer_ids[i], round(similarity_matrix[idx][i], 4)) for i in similar_indices]

# Convert to DataFrame and save as CSV
lookalike_df = pd.DataFrame([(cust, sim[0], sim[1]) for cust, sims in lookalike_dict.items() for sim in sims],
                            columns=["CustomerID", "LookalikeCustomerID", "SimilarityScore"])
lookalike_df.to_csv("Suriya V_Lookalike.csv", index=False)

print("Lookalike Model Generated and Saved as Suriya V_Lookalike.csv")


Lookalike Model Generated and Saved as Suriya V_Lookalike.csv
