**Name - Mohammad Saify Sheikh**

This Lookalike Model identifies the top 3 most similar customers based on their purchase behavior, spending habits, and category preferences. It uses a weighted cosine similarity approach combined with Manhattan distance-based clustering to ensure accurate recommendations. The model normalizes features, assigns weights.

In [14]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from sklearn.neighbors import NearestNeighbors
from scipy.spatial.distance import cosine

customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

merged = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

customer_features = merged.groupby("CustomerID").agg({
    "TotalValue": "sum",
    "TransactionID": "count",
    "Price_x": "mean",
    "Category": lambda x: x.mode()[0] if not x.mode().empty else "Unknown",
    "TransactionDate": lambda x: (pd.to_datetime("2024-12-31") - pd.to_datetime(x)).mean().days,
}).reset_index()

customer_features["Category"] = customer_features["Category"].astype("category").cat.codes

weights = {
    "TotalValue": 1.5,
    "TransactionID": 1.2,
    "Price_x": 1.0,
    "Category": 1.3,
    "TransactionDate": 0.8
}

for feature, weight in weights.items():
    customer_features[feature] *= weight

scaler = MinMaxScaler()
customer_features_scaled = scaler.fit_transform(customer_features.drop(columns=["CustomerID"]))

def weighted_cosine_similarity(a, b):
    return 1 - cosine(a, b)

knn = NearestNeighbors(n_neighbors=4, metric="manhattan")
knn.fit(customer_features_scaled)

def find_lookalikes(cust_id):
    if cust_id not in customer_features["CustomerID"].values:
        print(f"⚠️ Customer ID {cust_id} not found.")
        return

    idx = customer_features[customer_features["CustomerID"] == cust_id].index[0]
    distances, indices = knn.kneighbors([customer_features_scaled[idx]])

    similar_customers = []

    for dist, idx in zip(distances[0][1:], indices[0][1:]):
        sim_cust_id = customer_features.iloc[idx]["CustomerID"]
        sim_score = round(weighted_cosine_similarity(customer_features_scaled[idx], customer_features_scaled[indices[0][0]]), 4)
        similar_customers.append((sim_cust_id, sim_score))

    # Sort
    similar_customers = sorted(similar_customers, key=lambda x: x[1], reverse=True)

    print(f"\n🔍 **Similar Customers to {cust_id}:**")
    for i, (sim_cust_id, score) in enumerate(similar_customers, 1):
        print(f"{i}. Customer ID: {sim_cust_id} | Similarity Score: {score}")

    return similar_customers

cust_id_input = input("Enter Customer ID to find similar customers: ").strip()
find_lookalikes(cust_id_input)


Enter Customer ID to find similar customers: C0001

🔍 **Similar Customers to C0001:**
1. Customer ID: C0069 | Similarity Score: 0.9947
2. Customer ID: C0055 | Similarity Score: 0.994
3. Customer ID: C0072 | Similarity Score: 0.9925


[('C0069', 0.9947), ('C0055', 0.994), ('C0072', 0.9925)]