In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
import json

In [2]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("/content/Transactions - Transactions.csv")

In [6]:
# Task 2: Lookalike Model
# Aggregate Customer Data
customer_profile = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Price': 'mean'
}).reset_index()
customer_profile = customer_profile.merge(customers, on='CustomerID')
# Encode categorical data
le = LabelEncoder()
customer_profile['Region'] = le.fit_transform(customer_profile['Region'])
# Normalize data
scaler = StandardScaler()
normalized_data = scaler.fit_transform(customer_profile[['TotalValue', 'Quantity', 'Price', 'Region']])
# Compute Similarity
similarity_matrix = cosine_similarity(normalized_data)
# Generate Lookalike Recommendations
lookalike_results = {}
for i in range(20):  # First 20 customers (C0001 - C0020)
    similar_indices = np.argsort(-similarity_matrix[i])[1:4]  # Top 3 excluding self
    similar_customers = [(customer_profile.iloc[j]['CustomerID'], similarity_matrix[i][j]) for j in similar_indices]
    lookalike_results[customer_profile.iloc[i]['CustomerID']] = similar_customers

# Save Lookalike Results
with open("Lookalike.csv", "w") as f:
    for cust_id, similars in lookalike_results.items():
        f.write(f"{cust_id}, {json.dumps(similars)}\n")