In [14]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

customers_shivani = pd.read_csv('Customers.csv')
products_shivani = pd.read_csv('Products.csv')
transactions_shivani = pd.read_csv('Transactions.csv')

transactions_shivani = pd.merge(transactions_shivani, products_shivani, on='ProductID')

total_spending_shivani = transactions_shivani.groupby('CustomerID')['TotalValue'].sum().reset_index()
total_spending_shivani.columns = ['CustomerID', 'TotalSpending']

avg_transaction_value_shivani = transactions_shivani.groupby('CustomerID')['TotalValue'].mean().reset_index()
avg_transaction_value_shivani.columns = ['CustomerID', 'AvgTransactionValue']

favorite_category_shivani = transactions_shivani.groupby(['CustomerID', 'Category']).size().reset_index(name='Count')
favorite_category_shivani = favorite_category_shivani.loc[favorite_category_shivani.groupby('CustomerID')['Count'].idxmax()]
favorite_category_shivani = favorite_category_shivani[['CustomerID', 'Category']]
favorite_category_shivani.columns = ['CustomerID', 'FavoriteCategory']

num_transactions_shivani = transactions_shivani.groupby('CustomerID').size().reset_index(name='NumTransactions')

total_quantity_shivani = transactions_shivani.groupby('CustomerID')['Quantity'].sum().reset_index()
total_quantity_shivani.columns = ['CustomerID', 'TotalQuantity']

customer_features_shivani = pd.merge(customers_shivani, total_spending_shivani, on='CustomerID', how='left')
customer_features_shivani = pd.merge(customer_features_shivani, avg_transaction_value_shivani, on='CustomerID', how='left')
customer_features_shivani = pd.merge(customer_features_shivani, favorite_category_shivani, on='CustomerID', how='left')
customer_features_shivani = pd.merge(customer_features_shivani, num_transactions_shivani, on='CustomerID', how='left')
customer_features_shivani = pd.merge(customer_features_shivani, total_quantity_shivani, on='CustomerID', how='left')

customer_features_shivani.fillna(0, inplace=True)

customer_features_shivani = pd.get_dummies(customer_features_shivani, columns=['FavoriteCategory', 'Region'], drop_first=True)

scaler_shivani = StandardScaler()
numerical_features_shivani = ['TotalSpending', 'AvgTransactionValue', 'NumTransactions', 'TotalQuantity']
customer_features_shivani[numerical_features_shivani] = scaler_shivani.fit_transform(customer_features_shivani[numerical_features_shivani])

customer_features_shivani.set_index('CustomerID', inplace=True)
customer_features_shivani.drop(columns=['CustomerName', 'SignupDate'], inplace=True)

similarity_matrix_shivani = cosine_similarity(customer_features_shivani)

similarity_df_shivani = pd.DataFrame(similarity_matrix_shivani, index=customer_features_shivani.index, columns=customer_features_shivani.index)

def bhdouria(customer_id, similarity_df_shivani, top_n=3):
    similar_customers_shivani = similarity_df_shivani[customer_id].drop(customer_id).sort_values(ascending=False).head(top_n)
    return similar_customers_shivani

shivani_map = {}
for customer_id in customer_features_shivani.index[:20]:
    similar_customers_shivani = bhdouria(customer_id, similarity_df_shivani)
    shivani_map[customer_id] = [[similar_customer_shivani, float(score_shivani)] for similar_customer_shivani, score_shivani in zip(similar_customers_shivani.index, similar_customers_shivani.values)]

shivani_df = pd.DataFrame(shivani_map.items())

# Save the DataFrame to CSV without the header
shivani_df.to_csv('Shivani_Bhadouria_Lookalike.csv', index=False, header=False)

print(shivani_df)

        0                                                  1
0   C0001  [[C0190, 0.9449796330312132], [C0048, 0.943297...
1   C0002  [[C0088, 0.9567950735454178], [C0077, 0.908192...
2   C0003  [[C0052, 0.9054432967127763], [C0152, 0.837458...
3   C0004  [[C0165, 0.9764434526284471], [C0169, 0.928839...
4   C0005  [[C0186, 0.9780999104551312], [C0146, 0.962233...
5   C0006  [[C0168, 0.9735222664202972], [C0187, 0.941447...
6   C0007  [[C0140, 0.9807378835067342], [C0115, 0.930148...
7   C0008  [[C0109, 0.8455416700232936], [C0084, 0.815627...
8   C0009  [[C0198, 0.9643397739032491], [C0062, 0.929478...
9   C0010  [[C0111, 0.9230243277923992], [C0062, 0.904731...
10  C0011  [[C0137, 0.9378688431670885], [C0126, 0.920115...
11  C0012  [[C0104, 0.9736188526206875], [C0113, 0.939148...
12  C0013  [[C0099, 0.9830898690500509], [C0108, 0.927453...
13  C0014  [[C0060, 0.9784678112789252], [C0128, 0.958410...
14  C0015  [[C0131, 0.9767477723513618], [C0036, 0.971650...
15  C0016  [[C0183, 0.97