In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [48]:
# Load the data
customers_df = pd.read_csv(r"C:\Users\bmoha\Downloads\Customers.csv")
transactions_df = pd.read_csv(r"C:\Users\bmoha\Downloads\Transactions.csv")
products_df = pd.read_csv(r"C:\Users\bmoha\Downloads\Products.csv")

In [68]:
# Strip any spaces from column names
customers_df.columns = customers_df.columns.str.strip()
transactions_df.columns = transactions_df.columns.str.strip()
products_df.columns = products_df.columns.str.strip()

In [70]:
# Merge Transactions.csv with Products.csv on ProductID
transactions_products_df = transactions_df.merge(products_df, on="ProductID", how="left")

In [72]:
# Create customer-level aggregated features from transactions
customer_features = transactions_products_df.groupby("CustomerID").agg({
    "TotalValue": "sum",  # Total spending
    "TransactionID": "count",  # Number of transactions
    "Quantity": "sum"  # Total quantity purchased
}).rename(columns={
    "TotalValue": "TotalSpending",
    "TransactionID": "TransactionCount",
    "Quantity": "TotalQuantity"
}).reset_index()

In [74]:
# Merge customer features with profile info
customer_data = customers_df.merge(customer_features, on="CustomerID", how="left").fillna(0)

In [76]:
# Select numerical features for similarity calculation
features = customer_data[["TotalSpending", "TransactionCount", "TotalQuantity"]]

In [78]:
# Standardize the data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

In [80]:
# Compute cosine similarity
similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_data["CustomerID"], columns=customer_data["CustomerID"])

Lookalike recommendations saved to 'Lookalike.csv'
