In [1]:
import pandas as pd

customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")


In [3]:
from datetime import datetime

customers["RegionEncoded"] = customers["Region"].astype("category").cat.codes

customers["SignupDate"] = pd.to_datetime(customers["SignupDate"])
customers["AccountAge"] = (datetime.now() - customers["SignupDate"]).dt.days

customer_features = customers[["CustomerID", "RegionEncoded", "AccountAge"]]


In [4]:
import random

# Creating a dummy product preference feature for customers
categories = products["Category"].unique()

# Assigning random product preferences to each customer
customers["PreferredCategory"] = [random.choice(categories) for _ in range(len(customers))]

# One-hot encode the product categories
preferred_category_encoded = pd.get_dummies(customers["PreferredCategory"], prefix="Category")

# Merge the encoded categories with customer features
customer_features = pd.concat([customer_features, preferred_category_encoded], axis=1)


In [5]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Scale features for similarity computation
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.drop(columns=["CustomerID"]))

# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(scaled_features)


In [6]:
import numpy as np

# Map CustomerID to similarity matrix index
customer_ids = customer_features["CustomerID"].values
customer_index_map = {id_: i for i, id_ in enumerate(customer_ids)}

# Function to find top N similar customers
def get_top_similar_customers(customer_id, top_n=3):
    idx = customer_index_map[customer_id]
    scores = similarity_matrix[idx]
    similar_indices = np.argsort(scores)[::-1][1 : top_n + 1]  # Exclude self
    similar_customers = [(customer_ids[i], scores[i]) for i in similar_indices]
    return similar_customers

# Generate lookalikes for customers C0001 to C0020
lookalikes = {}
for customer_id in customer_ids[:20]:  # First 20 customers
    lookalikes[customer_id] = get_top_similar_customers(customer_id)

# Convert to DataFrame
lookalikes_df = pd.DataFrame(
    [
        {"cust_id": key, "lookalikes": str(value)}
        for key, value in lookalikes.items()
    ]
)

# Save to CSV
lookalikes_df.to_csv("Sheshu_Enabuthula_Lookalike.csv", index=False)
