In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [7]:
data_products = pd.read_csv('Products.csv')
data_transactions = pd.read_csv('Transactions.csv')

In [8]:
data_customers = pd.read_csv('Customers.csv')

In [11]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity


merged_data = pd.merge(data_transactions, data_customers, on='CustomerID')

# Feature engineering: Aggregate customer features
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'TransactionID': 'count',  # Number of transactions
    'TransactionDate': lambda x: (pd.to_datetime('today') - pd.to_datetime(x.max())).days  # Recency
}).reset_index()

# Rename columns for clarity
customer_features.columns = ['CustomerID', 'TotalSpent', 'TransactionCount', 'Recency']

# Merge with product data to include product information
product_sales = merged_data.groupby('CustomerID').agg({
    'ProductID': 'nunique',  # Number of unique products purchased
}).reset_index()

product_sales.columns = ['CustomerID', 'UniqueProducts']

# Combine customer features with product information
customer_features = pd.merge(customer_features, product_sales, on='CustomerID')

# Standardize features for similarity calculation
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features[['TotalSpent', 'TransactionCount', 'Recency', 'UniqueProducts']])

# Calculate cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Generate lookalike recommendations
lookalikes = {}
for idx, customer_id in enumerate(customer_features['CustomerID']):
    similar_indices = similarity_matrix[idx].argsort()[-4:-1][::-1]  # Get top 3 similar customers
    similar_customers = customer_features['CustomerID'].iloc[similar_indices].tolist()
    scores = similarity_matrix[idx][similar_indices].tolist()
    lookalikes[customer_id] = list(zip(similar_customers, scores))

# Convert to DataFrame
lookalike_list = []
for cust_id, similar in lookalikes.items():
    for similar_cust, score in similar:
        lookalike_list.append([cust_id, similar_cust, score])

lookalike_df = pd.DataFrame(lookalike_list, columns=['CustomerID', 'LookalikeID', 'SimilarityScore'])

# Filter for the first 20 customers (C0001 to C0020)
lookalike_df_filtered = lookalike_df[lookalike_df['CustomerID'].isin([f'C{str(i).zfill(4)}' for i in range(1, 21)])]

# Save to CSV
lookalike_df_filtered.to_csv('Lookalike.csv', index=False)

print("Lookalike recommendations saved to 'Lookalike.csv'.")

Lookalike recommendations saved to 'Lookalike.csv'.
