In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [2]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

In [3]:
merged = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

In [4]:
encoder = LabelEncoder()
merged['Region'] = encoder.fit_transform(merged['Region'])
merged['Category'] = encoder.fit_transform(merged['Category'])

In [8]:
# Aggregate Features by Customer
customer_features = merged.groupby('CustomerID').agg({
    'Region': 'first',
    'Category': 'sum',
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

In [6]:
# Normalize Features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(customer_features.iloc[:, 1:])

In [9]:
# Calculate Similarity
similarity_matrix = cosine_similarity(normalized_features)

In [10]:
# Generate Lookalike Recommendations
lookalike_results = {}
for i, customer in enumerate(customer_features['CustomerID']):
    similar_indices = similarity_matrix[i].argsort()[-4:-1][::-1]
    similar_customers = [(customer_features['CustomerID'][j], similarity_matrix[i][j]) for j in similar_indices]
    lookalike_results[customer] = similar_customers

In [11]:
# Save Lookalike Results
import csv
with open("FirstName_LastName_Lookalike.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["CustomerID", "Lookalikes"])
    for cust_id, lookalikes in lookalike_results.items():
        writer.writerow([cust_id, lookalikes])