In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score

In [None]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

In [None]:
def lookalike_model():
    # Merge datasets
    merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

    # Generate customer profiles
    customer_profiles = merged_data.groupby('CustomerID').agg({
        'TotalValue': 'sum',
        'Quantity': 'sum',
        'Region': lambda x: x.mode()[0]
    }).reset_index()

    # Encode categorical features
    customer_profiles = pd.get_dummies(customer_profiles, columns=['Region'])

    # Calculate similarity
    similarity_matrix = cosine_similarity(customer_profiles.drop(['CustomerID'], axis=1))

    # Get lookalikes
    lookalikes = {}
    for i, customer_id in enumerate(customer_profiles['CustomerID'][:20]):
        scores = similarity_matrix[i]
        similar_customers = sorted([(customer_profiles['CustomerID'][j], scores[j]) for j in range(len(scores)) if j != i], key=lambda x: -x[1])[:3]
        lookalikes[customer_id] = similar_customers

    # Save to CSV
    lookalike_df = pd.DataFrame({
        'CustomerID': list(lookalikes.keys()),
        'Lookalikes': [str(l) for l in lookalikes.values()]
    })
    lookalike_df.to_csv('Mohit_Chaudhary_Lookalike.csv', index=False)

lookalike_model()