Import Required Libraries

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

Load Data

In [2]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")


Merge Data

In [3]:
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

Create Customer Profiles

In [4]:
customer_profiles = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Sum of total value spent by the customer
    'TransactionID': 'count',  # Count of transactions per customer
    'Quantity': 'sum',  # Total quantity purchased by the customer
    'Category': lambda x: x.value_counts().idxmax()  # Most frequent category purchased by the customer
}).rename(columns={'TransactionID': 'TransactionCount'})  # Rename the transaction count column


Convert Categorical Features to Dummies

In [5]:
customer_profiles = pd.get_dummies(customer_profiles, columns=['Category'], prefix='Category')

Normalize Numerical Features

In [6]:
scaler = StandardScaler()
numerical_features = ['TotalValue', 'TransactionCount', 'Quantity']
customer_profiles[numerical_features] = scaler.fit_transform(customer_profiles[numerical_features])

Compute Cosine Similarity Matrix

In [7]:
similarity_matrix = cosine_similarity(customer_profiles)

Convert Similarity Matrix to DataFrame

In [8]:
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles.index, columns=customer_profiles.index)

Identify Top 3 Lookalike Customers

In [9]:
lookalike_data = []

for customer_id in customer_profiles.index[:20]:
    # Sort the similarity scores in descending order and get the top 3 similar customers (excluding the customer itself)
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    lookalike_data.append({
        "CustomerID": customer_id,  # Original customer ID
        # Format the lookalike customer IDs and their similarity scores as a tuple
        "Lookalike_1": f"({similar_customers.index[0]}, {similar_customers.values[0]:.4f})",
        "Lookalike_2": f"({similar_customers.index[1]}, {similar_customers.values[1]:.4f})",
        "Lookalike_3": f"({similar_customers.index[2]}, {similar_customers.values[2]:.4f})",
    })

Create DataFrame for Lookalike Data

In [10]:
lookalike_df = pd.DataFrame(lookalike_data)

Save Lookalike Data to CSV

In [11]:
lookalike_df.to_csv("Kuldeep_Lookalike.csv", index=False)