In [2]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load and preprocess data
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

# Aggregate transactional data
customer_features = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

# Merge with customer demographic data
customer_profiles = customers.merge(customer_features, on='CustomerID', how='left').fillna(0)

# Feature scaling
scaler = StandardScaler()
scaled_data = scaler.fit_transform(customer_profiles[['TotalValue', 'Quantity']])

# Calculate similarity
similarity_matrix = cosine_similarity(scaled_data)

# Find top 3 similar customers for each
lookalike_map = {}
for i, customer_id in enumerate(customer_profiles['CustomerID']):
    similarities = list(enumerate(similarity_matrix[i]))
    top_similar = sorted(similarities, key=lambda x: -x[1])[1:4]  # Exclude self
    lookalike_map[customer_id] = [(customer_profiles['CustomerID'][idx], score) for idx, score in top_similar]

# Save results
import csv
with open('Shivam_Sharma_Lookalike.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['cust_id', 'lookalikes'])
    for customer_id, lookalikes in lookalike_map.items():
        writer.writerow([customer_id, lookalikes])


In [None]:
from google.colab import drive
drive.mount('/content/drive')