<a href="https://colab.research.google.com/github/A-P-Dharanya/Zeotap/blob/main/DHARANYA_AP_EDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Task 2: Lookalike Model

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

customers_df = pd.read_csv('/content/Customers.csv')
products_df = pd.read_csv('/content/Products.csv')
transactions_df = pd.read_csv('/content/Transactions.csv')

# 1. Data Preprocessing
merged_data = pd.merge(transactions_df, customers_df, on='CustomerID', how='inner')
merged_data = pd.merge(merged_data, products_df, on='ProductID', how='inner')

# 2. Feature Engineering
customer_profile = merged_data.groupby('CustomerID').agg(
    total_spend=('TotalValue', 'sum'),
    purchase_count=('TotalValue', 'count'),
    last_purchase_date=('TransactionDate', 'max')
).reset_index()
customer_profile['recency'] = (pd.to_datetime('today') - pd.to_datetime(customer_profile['last_purchase_date'])).dt.days
customer_profile.drop(columns=['last_purchase_date'], inplace=True)

scaler = StandardScaler()
customer_profile_scaled = scaler.fit_transform(customer_profile[['total_spend', 'purchase_count', 'recency']])

# 3. Calculate Cosine Similarity between customers
cos_sim_matrix = cosine_similarity(customer_profile_scaled)

# 4. Build the Lookalike Model for the first 20 customers (C0001 - C0020)
lookalike_map = {}

for i in range(20):  # For CustomerID C0001 to C0020
    customer_id = f'C{i+1:04d}'
    customer_index = customer_profile[customer_profile['CustomerID'] == customer_id].index[0]

    similarity_scores = cos_sim_matrix[customer_index]
    similar_customers = sorted(zip(customer_profile['CustomerID'], similarity_scores), key=lambda x: x[1], reverse=True)[1:4]
    lookalike_map[customer_id] = [(cust_id, score) for cust_id, score in similar_customers]

# 5. Save the Lookalike recommendations to a CSV
lookalike_df = []
for cust_id, recommendations in lookalike_map.items():
    for rec in recommendations:
        lookalike_df.append([cust_id, rec[0], rec[1]])

lookalike_df = pd.DataFrame(lookalike_df, columns=['CustomerID', 'RecommendedCustomerID', 'SimilarityScore'])
lookalike_df.to_csv('Lookalike.csv', index=False)

#Top 3 lookalikes for each customer (C0001 - C0020)
print(lookalike_map)
