In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics.pairwise import cosine_similarity
import tensorflow as tf
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout


In [3]:

customers_df = pd.read_csv('datasets/Customers.csv')
products_df = pd.read_csv('datasets/Products.csv')
transactions_df = pd.read_csv('datasets/Transactions.csv')

customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])


In [4]:


current_date = transactions_df['TransactionDate'].max()

customer_features = transactions_df.groupby('CustomerID').agg({
    'TransactionDate': lambda x: (current_date - x.max()).days,  
    'TransactionID': 'count', 
    'Price': 'sum' 
}).rename(columns={
    'TransactionDate': 'recency',
    'TransactionID': 'frequency',
    'Price': 'monetary'
})

customer_features = customer_features.join(
    pd.get_dummies(customers_df.set_index('CustomerID')['Region'], prefix='region')
)

product_categories = pd.merge(transactions_df, products_df[['ProductID', 'Category']], on='ProductID')
category_preferences = pd.crosstab(product_categories['CustomerID'], product_categories['Category'])
category_preferences = category_preferences.div(category_preferences.sum(axis=1), axis=0)

final_features = customer_features.join(category_preferences)
final_features.fillna(0, inplace=True)


In [5]:

scaler = StandardScaler()
scaled_features = scaler.fit_transform(final_features)
scaled_features_df = pd.DataFrame(scaled_features, index=final_features.index, columns=final_features.columns)

similarity_matrix = cosine_similarity(scaled_features)


In [6]:

lookalike_map = {}
first_20_customers = customers_df['CustomerID'].iloc[:20]


In [7]:

for idx, customer_id in enumerate(first_20_customers):
    customer_idx = final_features.index.get_loc(customer_id)
    similarities = similarity_matrix[customer_idx]
    similar_indices = np.argsort(similarities)[::-1][1:4]
    similar_customers = final_features.index[similar_indices]
    similar_scores = similarities[similar_indices]
    lookalike_map[customer_id] = []
    for similar_customer, score in zip(similar_customers, similar_scores):
        lookalike_map[customer_id].append((similar_customer, score))

print("\nLookalike recommendations for first 20 customers (Map-based):")
for customer_id, sims in lookalike_map.items():
    print(f"\nCustomer {customer_id}:")
    for (similar_customer, score) in sims:
        print(f"Similar to {similar_customer} with score {score:.3f}")



Lookalike recommendations for first 20 customers (Map-based):

Customer C0001:
Similar to C0148 with score 0.872
Similar to C0096 with score 0.844
Similar to C0048 with score 0.832

Customer C0002:
Similar to C0134 with score 0.975
Similar to C0159 with score 0.948
Similar to C0106 with score 0.926

Customer C0003:
Similar to C0158 with score 0.905
Similar to C0129 with score 0.901
Similar to C0031 with score 0.875

Customer C0004:
Similar to C0113 with score 0.951
Similar to C0012 with score 0.926
Similar to C0147 with score 0.884

Customer C0005:
Similar to C0007 with score 0.956
Similar to C0140 with score 0.940
Similar to C0146 with score 0.864

Customer C0006:
Similar to C0187 with score 0.854
Similar to C0153 with score 0.845
Similar to C0071 with score 0.830

Customer C0007:
Similar to C0005 with score 0.956
Similar to C0140 with score 0.909
Similar to C0186 with score 0.795

Customer C0008:
Similar to C0098 with score 0.926
Similar to C0059 with score 0.908
Similar to C0065 wi