In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np

In [5]:
# Load the datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [9]:
# Merge datasets for comprehensive analysis
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')


In [13]:
# Correct Feature Engineering
# Calculate Price dynamically if it doesn't exist
if 'Price' not in merged_data.columns:
    merged_data['Price'] = merged_data['TotalValue'] / merged_data['Quantity']

# Aggregate transaction data per customer
customer_features = merged_data.groupby('CustomerID').agg({
    'Quantity': 'sum',
    'TotalValue': 'sum',
    'Price': 'mean'
}).reset_index()

# Merge customer demographics
customer_features = customer_features.merge(customers[['CustomerID', 'Region']], on='CustomerID')

# One-hot encode categorical features (Region)
customer_features = pd.get_dummies(customer_features, columns=['Region'], drop_first=True)

# Standardize numerical features
scaler = StandardScaler()
numerical_features = ['Quantity', 'TotalValue', 'Price']
customer_features[numerical_features] = scaler.fit_transform(customer_features[numerical_features])


In [15]:
# Compute Similarity Matrix
feature_matrix = customer_features.drop(columns=['CustomerID'])
similarity_matrix = cosine_similarity(feature_matrix)

# Generate Lookalike Recommendations
customer_ids = customer_features['CustomerID'].tolist()
lookalike_map = {}

for idx, customer_id in enumerate(customer_ids[:20]):  # First 20 customers
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_lookalikes = [(customer_ids[i], score) for i, score in sorted_scores[1:4]]  # Top 3 lookalikes
    lookalike_map[customer_id] = top_lookalikes

In [17]:
# Save Lookalike Map to CSV
lookalike_list = []
for cust_id, lookalikes in lookalike_map.items():
    for lookalike_id, score in lookalikes:
        lookalike_list.append({'CustomerID': cust_id, 'LookalikeID': lookalike_id, 'SimilarityScore': score})

lookalike_df = pd.DataFrame(lookalike_list)
lookalike_df.to_csv('Lookalike.csv', index=False)
print("Lookalike recommendations saved to 'Lookalike.csv'")

Lookalike recommendations saved to 'Lookalike.csv'
