In [25]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np
customers = pd.read_csv(r"C:\Users\henis\Downloads\Customers.csv")
products = pd.read_csv(r"C:\Users\henis\Downloads\Products.csv")
transactions = pd.read_csv(r"C:\Users\henis\Downloads\Transactions.csv")

In [27]:
#Merging the datasets
transactions_products = pd.merge(transactions, products, on="ProductID")
data = pd.merge(transactions_products, customers, on="CustomerID")

In [29]:
#Featuring
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': 'mean',   
    'TransactionID': 'count',  
    'Category': lambda x: x.nunique(),  
    'Region': lambda x: x.mode()[0]  # Dominant region 
}).rename(columns={
    'TotalValue': 'AvgTransactionValue',
    'TransactionID': 'PurchaseFrequency',
    'Category': 'UniqueCategories'
}).reset_index()

In [31]:
#Categorical feature
customer_features = pd.get_dummies(customer_features, columns=['Region'])

#Numerical feature 
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])

#Calculating the cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

In [33]:
#Top 3 lookalikes from 20 customers
lookalike_results = {}

for idx, cust_id in enumerate(customer_features['CustomerID'][:20]):
    #Similarity score for the current customer
    similarity_scores = similarity_matrix[idx]
    #Rank customers by similarity (exclude self)
    similar_customers = np.argsort(similarity_scores)[::-1][1:4]
    #Top 3 customers with their scores
    lookalike_results[cust_id] = [
        (customer_features.iloc[i]['CustomerID'], similarity_scores[i]) 
        for i in similar_customers
    ]


In [35]:
#Dataframe for lookalike.csv
lookalike_data = {
    "cust_id": [],
    "lookalike_list": []
}

for cust_id, similar_list in lookalike_results.items():
    lookalike_data["cust_id"].append(cust_id)
    lookalike_data["lookalike_list"].append(similar_list)

lookalike_df = pd.DataFrame(lookalike_data)

# Saving the results to Lookalike.csv
lookalike_csv_path = r"C:\Users\henis\Desktop\zeotap\Lookalike.csv"
lookalike_df.to_csv(lookalike_csv_path, index=False)