In [3]:
import pandas as pd
customers = pd.read_csv('../datasets/Customers.csv')
products = pd.read_csv('../datasets/Products.csv')
transactions = pd.read_csv('../datasets/Transactions.csv')

In [4]:
customers.head()
transactions.head()
products.head()


Unnamed: 0,ProductID,ProductName,Category,Price
0,P001,ActiveWear Biography,Books,169.3
1,P002,ActiveWear Smartwatch,Electronics,346.3
2,P003,ComfortLiving Biography,Books,44.12
3,P004,BookWorld Rug,Home Decor,95.69
4,P005,TechPro T-Shirt,Clothing,429.31


In [5]:
data = transactions.merge(customers, on='CustomerID')


In [6]:
customer_spending = data.groupby('CustomerID').agg(
    total_spend=('TotalValue', 'sum'),
    purchase_count=('TransactionID', 'count')
).reset_index()


In [7]:
customer_profile = customers[['CustomerID', 'Region']]
data_combined = customer_spending.merge(customer_profile, on='CustomerID')


In [8]:
data_combined = pd.get_dummies(data_combined, columns=['Region'], drop_first=True)


In [9]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
data_combined[['total_spend', 'purchase_count']] = scaler.fit_transform(data_combined[['total_spend', 'purchase_count']])


In [10]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute cosine similarity between customers
similarity_matrix = cosine_similarity(data_combined[['total_spend', 'purchase_count'] + [col for col in data_combined.columns if col.startswith('Region_')]])


In [11]:
import numpy as np

lookalike_recommendations = {}

for customer_id in customers['CustomerID'][:20]:  # For first 20 customers
    # Get the index of the current customer
    customer_index = data_combined[data_combined['CustomerID'] == customer_id].index[0]
    
    # Calculate similarity scores for the current customer
    similarity_scores = similarity_matrix[customer_index]
    
    # Exclude the current customer from their own similarity score
    similarity_scores[customer_index] = -1
    
    # Get the top 3 similar customers (based on similarity score)
    top_3_similar_customers = np.argsort(similarity_scores)[-3:]
    
    # Get the customer IDs and similarity scores
    similar_customers = [(data_combined.iloc[i]['CustomerID'], similarity_scores[i]) for i in top_3_similar_customers]
    
    # Store in the dictionary
    lookalike_recommendations[customer_id] = similar_customers


In [12]:
lookalike_df = []
for cust_id, recommendations in lookalike_recommendations.items():
    for rec in recommendations:
        lookalike_df.append([cust_id, rec[0], rec[1]])

lookalike_df = pd.DataFrame(lookalike_df, columns=['CustomerID', 'LookalikeCustomerID', 'SimilarityScore'])
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike recommendations saved to 'Lookalike.csv'.")


Lookalike recommendations saved to 'Lookalike.csv'.
