In [13]:
# Required libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Preprocessing: Create customer-level features
# Calculate total transactions, spending, and categories purchased
customer_features = transactions.groupby('CustomerID').agg(
    TotalTransactions=('TransactionID', 'count'),
    TotalSpending=('TotalValue', 'sum')
).reset_index()

# Add region information from Customers.csv
customer_features = customer_features.merge(customers[['CustomerID', 'Region']], on='CustomerID', how='left')

# One-hot encode regions
customer_features = pd.get_dummies(customer_features, columns=['Region'], prefix='Region')

# Normalize features for cosine similarity
scaler = StandardScaler()
features_scaled = scaler.fit_transform(customer_features.drop(columns=['CustomerID']))

# Calculate cosine similarity
similarity_matrix = cosine_similarity(features_scaled)

# Create Lookalike Recommendations
lookalike_dict = {}

for idx, customer_id in enumerate(customer_features['CustomerID']):
    # Get similarity scores for the current customer
    similarity_scores = list(enumerate(similarity_matrix[idx]))

    # Sort by similarity score in descending order, exclude self (score=1 for self)
    similar_customers = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]

    # Store top 3 recommendations with similarity scores
    lookalike_dict[customer_id] = [
        (customer_features['CustomerID'].iloc[i], round(score, 4)) for i, score in similar_customers
    ]

# Generate Lookalike.csv
lookalike_df = pd.DataFrame({
    'CustomerID': lookalike_dict.keys(),
    'Recommendations': [str(rec) for rec in lookalike_dict.values()]
})

lookalike_df.to_csv('FirstName_LastName_Lookalike.csv', index=False)
print("Lookalike recommendations saved as 'FirstName_LastName_Lookalike.csv'")


Lookalike recommendations saved as 'FirstName_LastName_Lookalike.csv'
