In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


In [4]:
# Load datasets
customers = pd.read_csv("/content/drive/MyDrive/CopyofCustomers.csv")
products = pd.read_csv("/content/drive/MyDrive/CopyofProducts.csv")
transactions = pd.read_csv("/content/drive/MyDrive/CopyofTransactions.csv")

In [5]:
# Preprocess the data
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])


In [6]:
# Merge datasets
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")


In [7]:
# Feature Engineering
customer_features = merged_data.groupby('CustomerID').agg(
    total_spending=('TotalValue', 'sum'),
    avg_transaction_value=('TotalValue', 'mean'),
    total_transactions=('TransactionID', 'count'),
    avg_quantity=('Quantity', 'mean')
).reset_index()


In [8]:
# Add region-based categorical encoding
region_dummies = pd.get_dummies(customers.set_index('CustomerID')['Region'])
customer_features = customer_features.set_index('CustomerID').join(region_dummies)



In [9]:
# Normalize features for similarity calculation
scaler = StandardScaler()
normalized_features = scaler.fit_transform(customer_features.drop(columns=['total_transactions']))
customer_features_normalized = pd.DataFrame(normalized_features,
                                            index=customer_features.index,
                                            columns=customer_features.columns[1:])



In [10]:
# Calculate similarity
similarity_matrix = cosine_similarity(customer_features_normalized)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features.index, columns=customer_features.index)


In [11]:
# Generate Lookalike Recommendations
lookalike_map = {}

for customer in customers['CustomerID'][:20]:  # First 20 customers
    if customer in similarity_df.index:
        similar_customers = similarity_df.loc[customer].sort_values(ascending=False)[1:4]  # Top 3 excluding itself
        lookalike_map[customer] = similar_customers.to_dict()



In [12]:
# Save results to Lookalike.csv
output_df = pd.DataFrame({
    "cust_id": lookalike_map.keys(),
    "recommendations": [{k: v for k, v in lookalike_map[c].items()} for c in lookalike_map.keys()]
})
output_df.to_csv("Lookalike.csv", index=False)

print("Lookalike recommendations saved to Lookalike.csv")

Lookalike recommendations saved to Lookalike.csv
