<a href="https://colab.research.google.com/github/Kowsalyasriganesh/zeotap/blob/main/Kowsalya_SriGanesh_Lookalike.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

In [2]:
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

In [4]:
def build_lookalike_model():
    # Merge customer and transaction data
    customer_transactions = transactions_df.groupby('CustomerID').agg({'TotalValue': 'sum', 'Quantity': 'sum'}).reset_index()
    customer_profiles = customers_df.merge(customer_transactions, on='CustomerID', how='left').fillna(0)

    # Standardize numeric features
    scaler = StandardScaler()
    features = scaler.fit_transform(customer_profiles[['TotalValue', 'Quantity']])

    # Calculate cosine similarity
    similarity = cosine_similarity(features)
    similarity_df = pd.DataFrame(similarity, index=customer_profiles['CustomerID'], columns=customer_profiles['CustomerID'])

    # Get top 3 similar customers for each of the first 20 customers
    lookalike_dict = {}
    for customer_id in customer_profiles['CustomerID'][:20]:
        similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
        lookalike_dict[customer_id] = list(zip(similar_customers.index, similar_customers.values))

    # Save lookalike results
    lookalike_df = pd.DataFrame({"CustomerID": lookalike_dict.keys(), "SimilarCustomers": lookalike_dict.values()})
    lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike recommendations saved to Lookalike.csv")

Lookalike recommendations saved to Lookalike.csv
