In [5]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

data = transactions.merge(customers, on='CustomerID').merge(products, on ='ProductID')
customer_profiles = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'ProductID': lambda x: list(x),
    'Region': 'first',
    'TransactionID': 'count'}).reset_index()



mlb = MultiLabelBinarizer()
product_encoded = pd.DataFrame(
    mlb.fit_transform(customer_profiles['ProductID']),
    columns = mlb.classes_,
    index = customer_profiles.index
)

region_encoded = pd.get_dummies(customer_profiles['Region'], prefix = 'Region')
features = pd.concat([customer_profiles[['TotalValue', 'Quantity']], region_encoded, product_encoded], axis = 1)

similarity_matrix = cosine_similarity(features)
similarity_df = pd.DataFrame(similarity_matrix, index = customer_profiles['CustomerID'], columns = customer_profiles['CustomerID'])

recommendations = []
for customer_id in customer_profiles['CustomerID'][:20]:
    similar_customers = similarity_df[customer_id].sort_values(ascending = False).iloc[1:4]
    recommendations.append({
    'CustomerID': customer_id,
    'Lookalike1': similar_customers.index[0],
    'Score1': similar_customers.iloc[0],
    'Lookalike2': similar_customers.index[1],
    'Score2': similar_customers.iloc[1],
    'Lookalike3': similar_customers.index[2],
    'Score3': similar_customers.iloc[2],})

recommendations_df = pd.DataFrame(recommendations)
recommendations_df.to_csv('Lookalike.csv', index = False)
features.to_csv('ProcessedFeatures.csv', index=False)
print("Sankalp_Parajuli_Lookalike.csv Generated!")

Sankalp_Parajuli_Lookalike.csv Generated!
