In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime
import os

In [2]:
df = pd.read_csv("../../ShoppersSmart/merged_dataset.csv")
df.head()

Unnamed: 0,transaction_id,customer_id,product_id,purchase_date,name_x,email,phone_number,location,name_y,description,price
0,1,42,84,2025-06-19,Aadhya Nair,aadhya.nair@example.com,9025501427,Lucknow,Ceiling Fan - Premium,Ceiling Fan - Premium - Essential household item,1397.48
1,2,55,83,2025-05-24,Ishaan Nair,ishaan.nair@example.com,9768045202,Mumbai,Stapler Pack,Stapler Pack - Essential household item,997.22
2,3,87,214,2025-06-12,Myra Chatterjee,myra.chatterjee@example.com,9779426304,Patna,Scissors (Combo),Scissors (Combo) - Essential household item,1171.84
3,4,15,169,2025-06-25,Riya Nair,riya.nair@example.com,9101612239,Mumbai,Memory Card XL,Memory Card XL - Essential household item,1922.17
4,5,78,179,2025-05-23,Myra Nair,myra.nair@example.com,9319718430,Delhi,Broom (Combo),Broom (Combo) - Essential household item,683.59


In [3]:
print(df.columns)

Index(['transaction_id', 'customer_id', 'product_id', 'purchase_date',
       'name_x', 'email', 'phone_number', 'location', 'name_y', 'description',
       'price'],
      dtype='object')


In [4]:
df.dropna(subset=['customer_id', 'name_y', 'location', 'purchase_date'], inplace=True)
df['customer_id'] = df['customer_id'].astype(str)
df['purchase_date'] = pd.to_datetime(df['purchase_date'], errors='coerce')
df.dropna(subset=['purchase_date'], inplace=True)


In [5]:
df['quantity'] = 1
now = datetime.now()
df['days_ago'] = (now - df['purchase_date']).dt.days
df['recency_weight'] = np.exp(-df['days_ago'] / 30)
df['weighted_quantity'] = df['quantity'] * df['recency_weight']

In [6]:
pivot_table = df.pivot_table(
    index='customer_id',
    columns='name_y',
    values='quantity',
    aggfunc='sum',
    fill_value=0
)

In [7]:
similarity_matrix = cosine_similarity(pivot_table)
similarity_df = pd.DataFrame(similarity_matrix, index=pivot_table.index, columns=pivot_table.index)

In [8]:
customer_history = df.groupby('customer_id')['name_y'].apply(list).to_dict()

In [9]:
top_products_by_location = (
    df.groupby(['location', 'name_y'])['quantity']
    .sum()
    .reset_index()
    .sort_values(['location', 'quantity'], ascending=[True, False])
)

In [10]:
location_recommendations = {}
for location, group in top_products_by_location.groupby('location'):
    location_recommendations[location] = group.head(10)['name_y'].tolist()

In [11]:
os.makedirs("../../ShoppersSmart/recommender", exist_ok=True)

In [12]:
with open("../../ShoppersSmart/recommender/pivot_table.pkl", "wb") as f:
    pickle.dump(pivot_table, f)

In [13]:
with open("../../ShoppersSmart/recommender/similarity.pkl", "wb") as f:
    pickle.dump(similarity_df, f)

In [14]:
with open("../../ShoppersSmart/recommender/customer_history.pkl", "wb") as f:
    pickle.dump(customer_history, f)

In [15]:
with open("../../ShoppersSmart/recommender/location_recommendations.pkl", "wb") as f:
    pickle.dump(location_recommendations, f)

In [16]:
print("All recommender files saved successfully: collaborative, history, and location-based.")

All recommender files saved successfully: collaborative, history, and location-based.


KeyError: 'product_name'