<a href="https://colab.research.google.com/github/SBoo9/Zeotap_Assignment/blob/main/Sujoy_Banerjee_Lookalike.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
# Load datasets
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

In [3]:
# Merge data
data = pd.merge(customers, transactions, on='CustomerID')

In [4]:
# Feature engineering
features = data.groupby('CustomerID').agg({
    'Quantity': 'sum',
    'TotalValue': 'mean',
    'Price': 'mean'
}).reset_index()

In [5]:
# Standardize features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features.iloc[:, 1:])

In [6]:
# Compute similarity
similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=features['CustomerID'], columns=features['CustomerID'])

In [7]:
# Generate top 3 lookalikes
lookalikes = {}
for customer in features['CustomerID']:
    similar_customers = similarity_df[customer].sort_values(ascending=False)[1:4]
    lookalikes[customer] = list(similar_customers.items())

In [8]:
# Save to CSV
output = [{'CustomerID': cust, 'Lookalikes': str(lookalikes[cust])} for cust in lookalikes]
pd.DataFrame(output).to_csv('FirstName_LastName_Lookalike.csv', index=False)