In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import davies_bouldin_score

In [2]:
def load_data():

    customers = pd.read_csv('/content/Customers.csv')
    products = pd.read_csv('/content/Products.csv')
    transactions = pd.read_csv('/content/Transactions.csv')

    # Merge datasets
    data = transactions.merge(customers, on='CustomerID', how='left').merge(products, on='ProductID', how='left')

    # Rename columns for clarity
    data.rename(columns={'Price_x': 'TransactionPrice', 'Price_y': 'ProductPrice'}, inplace=True)

    return customers, products, transactions, data

In [5]:
def lookalike_model(customers, data):
        # Replace 'Price' with 'TransactionPrice' or 'ProductPrice'
    customer_profiles = data.groupby('CustomerID').agg({
        'TotalValue': 'sum',
        'Quantity': 'sum',
        'ProductPrice': 'mean'  # Use 'ProductPrice' if referring to the product's price
    }).reset_index()


    # Normalize data
    scaler = StandardScaler()
    profile_matrix = scaler.fit_transform(customer_profiles[['TotalValue', 'Quantity', 'ProductPrice']])

    # Compute similarity
    similarity_matrix = cosine_similarity(profile_matrix)

    # Find top 3 lookalikes for first 20 customers
    lookalikes = {}
    for idx in range(20):
        similar_customers = np.argsort(-similarity_matrix[idx])[1:4]  # Skip self (first element)
        scores = similarity_matrix[idx][similar_customers]
        lookalikes[customer_profiles['CustomerID'][idx]] = list(zip(customer_profiles['CustomerID'][similar_customers], scores))

    # Save results
    lookalike_df = pd.DataFrame.from_dict(lookalikes, orient='index', columns=['Top1', 'Top2', 'Top3'])
    lookalike_df.to_csv('Suryansh_Jaiswal_Lookalike.csv', index_label='CustomerID')
    print("Lookalike model results saved to Lookalike.csv.")

In [6]:
if __name__ == "__main__":
    customers, products, transactions, data = load_data()
    print("--- Building Lookalike Model ---")
    lookalike_model(customers, data)

--- Building Lookalike Model ---
Lookalike model results saved to Lookalike.csv.
