In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


In [2]:
# Load data
customers_url = "https://drive.google.com/uc?id=1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE"
products_url = "https://drive.google.com/uc?id=1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0"
transactions_url = "https://drive.google.com/uc?id=1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF"

customers = pd.read_csv(customers_url)
products = pd.read_csv(products_url)
transactions = pd.read_csv(transactions_url)

In [3]:
# Merge data
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])
merged_data = transactions.merge(customers, on='CustomerID', how='left')
merged_data = merged_data.merge(products, on='ProductID', how='left')


In [4]:
# Feature Engineering
# Aggregate transaction data by customer
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Category': lambda x: x.value_counts().idxmax(),  # Most purchased category
    'Region': 'first',
    'SignupDate': 'first'
}).reset_index()

In [5]:
# convert categorical data to numeric
customer_features = pd.get_dummies(customer_features, columns=['Region', 'Category'], drop_first=True)


In [6]:
# Standardize numerical features
scaler = StandardScaler()
numeric_cols = ['TotalValue', 'Quantity']
customer_features[numeric_cols] = scaler.fit_transform(customer_features[numeric_cols])

In [7]:
# Compute similarity matrix
customer_ids = customer_features['CustomerID']
customer_features_matrix = customer_features.drop(['CustomerID', 'SignupDate'], axis=1)
similarity_matrix = cosine_similarity(customer_features_matrix)


In [8]:
# Generate Lookalike Recommendations
def get_top_lookalikes(customer_index, top_n=3):
    similarities = similarity_matrix[customer_index]
    similar_indices = similarities.argsort()[::-1][1:top_n+1]  # Exclude self (highest similarity)
    similar_customers = [(customer_ids.iloc[i], similarities[i]) for i in similar_indices]
    return similar_customers

In [9]:
# Generate recommendations for the first 20 customers
lookalike_results = {}
for i in range(20):
    cust_id = customer_ids.iloc[i]
    lookalikes = get_top_lookalikes(i)
    lookalike_results[cust_id] = lookalikes


In [10]:
# Save results to Lookalike.csv
lookalike_data = []
for cust_id, lookalikes in lookalike_results.items():
    for similar_cust_id, score in lookalikes:
        lookalike_data.append({'cust_id': cust_id, 'similar_cust_id': similar_cust_id, 'score': score})

lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike recommendations saved to Lookalike.csv.")


Lookalike recommendations saved to Lookalike.csv.
