In [69]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import numpy as np
from google.colab import files

In [70]:
customers = pd.read_csv('https://drive.google.com/uc?id=1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE')
products = pd.read_csv('https://drive.google.com/uc?id=1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0')
transactions = pd.read_csv('https://drive.google.com/uc?id=1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF')

In [71]:
# Convert date columns
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

In [72]:
#Merge Datasets
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

In [73]:
# Feature Engineering
# Customer Lifetime Value (CLV)
customer_clv = merged_data.groupby('CustomerID')['TotalValue'].sum().reset_index()
customer_clv.rename(columns={'TotalValue': 'CLV'}, inplace=True)


In [74]:
# Calculate Average Purchase Frequency per Customer
avg_purchase_freq = merged_data.groupby('CustomerID').size().reset_index(name='AvgPurchaseFrequency')

In [75]:
# One-Hot Encode Product Categories
category_encoded_df = pd.get_dummies(merged_data.pivot_table(index='CustomerID',
                                                             columns='Category',
                                                             values='Quantity',
                                                             aggfunc='sum',
                                                             fill_value=0))


In [76]:
# One-Hot Encode Regions
region_encoded_df = pd.get_dummies(customers.set_index('CustomerID')['Region'], prefix='Region')


In [77]:
# Combine Features
customer_features = pd.concat([customer_clv.set_index('CustomerID'),
                                avg_purchase_freq.set_index('CustomerID'),
                                category_encoded_df,
                                region_encoded_df], axis=1)

In [78]:
# Handle Missing Values
customer_features.fillna(0, inplace=True)


In [79]:
#Scale Numerical Features
numerical_columns = ['CLV', 'AvgPurchaseFrequency']
scaler = StandardScaler()
customer_features[numerical_columns] = scaler.fit_transform(customer_features[numerical_columns])

In [80]:
#Similarity Calculation
features_for_similarity = numerical_columns + list(category_encoded_df.columns) + list(region_encoded_df.columns)
similarity_matrix = cosine_similarity(customer_features[features_for_similarity])

In [81]:
#Find Lookalikes
def find_top_lookalikes(customer_id, similarity_matrix, customer_ids, top_n=3):
    customer_index = customer_ids.index(customer_id)
    similarity_scores = list(enumerate(similarity_matrix[customer_index]))
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_lookalikes = [(customer_ids[i], round(score, 2)) for i, score in sorted_scores[1:top_n+1]]
    return top_lookalikes


In [82]:
# Generate Lookalikes for First 20 Customers
customer_ids = customer_features.index.tolist()
lookalike_data = {}
for customer_id in customer_ids[:20]:  # First 20 customers
    lookalike_data[customer_id] = find_top_lookalikes(customer_id, similarity_matrix, customer_ids)

In [83]:
#Save Lookalike Recommendations to CSV
lookalike_df = pd.DataFrame({
    'CustomerID': lookalike_data.keys(),
    'SimilarCustomers': [str(val) for val in lookalike_data.values()]
})
lookalike_df.to_csv('Lookalike.csv', index=False)
print("Lookalike recommendations saved to Aryaka_Agarwal_Lookalike.csv.")

Lookalike recommendations saved to Aryaka_Agarwal_Lookalike.csv.


In [84]:
# Save Lookalike Recommendations to CSV
lookalike_df.to_csv('Aryaka_Agarwal_Lookalike.csv', index=False)
print("Lookalike recommendations saved to Aryaka_Agarwal_Lookalike.csv.")

# Provide a download link for the file
from google.colab import files
files.download('Aryaka_Agarwal_Lookalike.csv')


Lookalike recommendations saved to Aryaka_Agarwal_Lookalike.csv.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>