In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors


In [2]:
# Load datasets
customers = pd.read_csv("D:\\Assignment\\Customers.csv")
products = pd.read_csv("D:\\Assignment\\Products.csv")
transactions = pd.read_csv("D:\\Assignment\\Transactions.csv")

In [3]:
merged_df = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

In [4]:
# Transaction history for each customer
customer_features = merged_df.groupby("CustomerID").agg(
    total_spend=("TotalValue", "sum"),
    total_transactions=("TransactionID", "count"),
    avg_price=("Price_x", "mean"),
    unique_products=("ProductID", "nunique")
).reset_index()

In [5]:

customer_profiles = customers.set_index("CustomerID")[["Region"]]
customer_profiles = pd.get_dummies(customer_profiles)


final_features = customer_features.set_index("CustomerID").join(customer_profiles)

In [6]:
# Normalization
scaler = StandardScaler()
normalized_features = scaler.fit_transform(final_features)


In [7]:
# KNN
nn_model = NearestNeighbors(n_neighbors=4, metric='euclidean')
nn_model.fit(normalized_features)
distances, indices = nn_model.kneighbors(normalized_features)

In [8]:
# Similar customers 
lookalike_results = {}
for i, customer_id in enumerate(final_features.index[:20]):
    similar_customers = [(final_features.index[idx], distances[i][j]) for j, idx in enumerate(indices[i][1:4])]
    lookalike_results[customer_id] = similar_customers


lookalike_df = pd.DataFrame([
    [cust_id] + [item for pair in lookalike_results[cust_id] for item in pair]  
    for cust_id in lookalike_results
], columns=["CustomerID", "CustID_1", "Score_1", "CustID_2", "Score_2", "CustID_3", "Score_3"])


lookalike_df.to_csv("Nithin_V_Lookalike.csv", index=False)  

print("Lookalike model results saved to Nithin_V_Lookalike.csv")


Lookalike model results saved to Nithin_V_Lookalike.csv
