In [None]:
import pandas as pd

# Load datasets
customers = pd.read_csv("/content/drive/MyDrive/datasets/Customers (1).csv")
products = pd.read_csv("/content/drive/MyDrive/datasets/Products.csv")
transactions = pd.read_csv("/content/drive/MyDrive/datasets/Transactions.csv")

In [None]:
# Merge datasets
merged = pd.merge(transactions, customers, on="CustomerID")
merged = pd.merge(merged, products, on="ProductID")

# Create customer-level aggregates
customer_features = merged.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total revenue generated
    'ProductID': 'count',  # Number of products purchased
    'Category': lambda x: x.mode()[0],  # Most purchased category
}).reset_index()

print(customer_features.head())


  CustomerID  TotalValue  ProductID     Category
0      C0001     3354.52          5  Electronics
1      C0002     1862.74          4     Clothing
2      C0003     2725.38          4   Home Decor
3      C0004     5354.88          8        Books
4      C0005     2034.24          3  Electronics


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# Normalize the numerical features
scaler = MinMaxScaler()
customer_features_scaled = scaler.fit_transform(customer_features[['TotalValue', 'ProductID']])

# Compute similarity matrix
similarity_matrix = cosine_similarity(customer_features_scaled)

# Find top 3 similar customers for each customer
similar_customers = {}
for i, customer_id in enumerate(customer_features['CustomerID']):
    similar_indices = similarity_matrix[i].argsort()[::-1][1:4]  # Top 3 excluding self
    similar_customers[customer_id] = [(customer_features['CustomerID'][j], similarity_matrix[i][j]) for j in similar_indices]

# Save results to Lookalike.csv
import csv
with open("Lookalike.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["CustomerID", "LookalikeCustomers"])
    for customer_id, lookalikes in similar_customers.items():
        writer.writerow([customer_id, lookalikes])
