In [1]:
# Lookalike_Model.ipynb - Lookalike Model for Similar Customers

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv('../data/Customers.csv')
products = pd.read_csv('../data/Products.csv')
transactions = pd.read_csv('../data/Transactions.csv')

# Merge datasets
df = transactions.merge(customers, on='CustomerID', how='left') \
               .merge(products, on='ProductID', how='left')

# Feature engineering
customer_features = df.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'TransactionID': 'nunique',
    'Quantity': 'sum'
}).reset_index()

# Normalize data using StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])

# Compute similarity matrix
similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])

# Recommend top 3 similar customers for first 20 customers
lookalikes = {}
for customer in customer_features['CustomerID'][:20]:
    similar_customers = similarity_df[customer].sort_values(ascending=False).iloc[1:4]
    lookalikes[customer] = list(zip(similar_customers.index, similar_customers.values))

# Convert results to DataFrame
lookalike_df = pd.DataFrame.from_dict(lookalikes, orient='index', columns=['Lookalike1', 'Lookalike2', 'Lookalike3'])
lookalike_df.index.name = 'CustomerID'

# Save results to CSV
lookalike_df.to_csv('../outputs/FirstName_LastName_Lookalike.csv')
