In [3]:
# Required libraries
from pymongo import MongoClient
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dot, Flatten
from sklearn.model_selection import train_test_split

In [4]:
#Connect with mongo DB
client = MongoClient("mongodb://localhost:27017/")

In [5]:
db = client["AICommerceDB"]
collection = db["user_interactions"]

In [7]:
# Get all documents from the userinteractions collection
records = list(collection.find())

In [8]:
# Convert the list of documents into a pandas DataFrame for processing
df = pd.DataFrame(records)

In [9]:
df

Unnamed: 0,_id,user_id,product_id,event_type,timestamp
0,67df0db09b6d56326c835a88,U035,P105,view,05-03-2025 11:38
1,67df0db09b6d56326c835a89,U042,P110,add_to_cart,05-03-2025 06:30
2,67df0db09b6d56326c835a8a,U031,P108,add_to_cart,14-03-2025 09:50
3,67df0db09b6d56326c835a8b,U018,P104,view,06-03-2025 11:59
4,67df0db09b6d56326c835a8c,U017,P102,view,19-02-2025 19:35
...,...,...,...,...,...
1995,67df0db09b6d56326c836253,U035,P111,view,09-03-2025 02:21
1996,67df0db09b6d56326c836254,U040,P106,view,12-03-2025 21:30
1997,67df0db09b6d56326c836255,U031,P105,add_to_cart,10-03-2025 16:33
1998,67df0db09b6d56326c836256,U032,P104,view,11-03-2025 21:19


In [11]:
df["event_type"].value_counts()

event_type
view           1175
add_to_cart     524
purchase        301
Name: count, dtype: int64

In [12]:
#preprocess the data
event_score_map = {
    "view":1,
    "add_to_cart": 3,
    "purchase": 5
}

In [13]:
event_score_map

{'view': 1, 'add_to_cart': 3, 'purchase': 5}

In [14]:
# Remove any events we don't care about
df = df[df["event_type"].isin(event_score_map)]

In [17]:
df.isnull().sum()

_id           0
user_id       0
product_id    0
event_type    0
timestamp     0
dtype: int64

In [18]:
# Create a new column "interaction" based on event_type score
df["interaction"] = df["event_type"].map(event_score_map)

In [24]:
df

Unnamed: 0,_id,user_id,product_id,event_type,timestamp,interaction
0,67df0db09b6d56326c835a88,U035,P105,view,05-03-2025 11:38,1
1,67df0db09b6d56326c835a89,U042,P110,add_to_cart,05-03-2025 06:30,3
2,67df0db09b6d56326c835a8a,U031,P108,add_to_cart,14-03-2025 09:50,3
3,67df0db09b6d56326c835a8b,U018,P104,view,06-03-2025 11:59,1
4,67df0db09b6d56326c835a8c,U017,P102,view,19-02-2025 19:35,1
...,...,...,...,...,...,...
1995,67df0db09b6d56326c836253,U035,P111,view,09-03-2025 02:21,1
1996,67df0db09b6d56326c836254,U040,P106,view,12-03-2025 21:30,1
1997,67df0db09b6d56326c836255,U031,P105,add_to_cart,10-03-2025 16:33,3
1998,67df0db09b6d56326c836256,U032,P104,view,11-03-2025 21:19,1


In [25]:
df["user_code"] = df["user_id"].astype("category").cat.codes

In [26]:
df

Unnamed: 0,_id,user_id,product_id,event_type,timestamp,interaction,user_code
0,67df0db09b6d56326c835a88,U035,P105,view,05-03-2025 11:38,1,34
1,67df0db09b6d56326c835a89,U042,P110,add_to_cart,05-03-2025 06:30,3,41
2,67df0db09b6d56326c835a8a,U031,P108,add_to_cart,14-03-2025 09:50,3,30
3,67df0db09b6d56326c835a8b,U018,P104,view,06-03-2025 11:59,1,17
4,67df0db09b6d56326c835a8c,U017,P102,view,19-02-2025 19:35,1,16
...,...,...,...,...,...,...,...
1995,67df0db09b6d56326c836253,U035,P111,view,09-03-2025 02:21,1,34
1996,67df0db09b6d56326c836254,U040,P106,view,12-03-2025 21:30,1,39
1997,67df0db09b6d56326c836255,U031,P105,add_to_cart,10-03-2025 16:33,3,30
1998,67df0db09b6d56326c836256,U032,P104,view,11-03-2025 21:19,1,31


In [28]:
df["product_code"] = df["product_id"].astype("category").cat.codes

In [29]:
df

Unnamed: 0,_id,user_id,product_id,event_type,timestamp,interaction,user_code,product_code
0,67df0db09b6d56326c835a88,U035,P105,view,05-03-2025 11:38,1,34,5
1,67df0db09b6d56326c835a89,U042,P110,add_to_cart,05-03-2025 06:30,3,41,10
2,67df0db09b6d56326c835a8a,U031,P108,add_to_cart,14-03-2025 09:50,3,30,8
3,67df0db09b6d56326c835a8b,U018,P104,view,06-03-2025 11:59,1,17,4
4,67df0db09b6d56326c835a8c,U017,P102,view,19-02-2025 19:35,1,16,2
...,...,...,...,...,...,...,...,...
1995,67df0db09b6d56326c836253,U035,P111,view,09-03-2025 02:21,1,34,11
1996,67df0db09b6d56326c836254,U040,P106,view,12-03-2025 21:30,1,39,6
1997,67df0db09b6d56326c836255,U031,P105,add_to_cart,10-03-2025 16:33,3,30,5
1998,67df0db09b6d56326c836256,U032,P104,view,11-03-2025 21:19,1,31,4


In [30]:
# Save mappings to convert numeric predictions back to actual product_ids
user_id_map = dict(zip(df["user_code"], df["user_id"]))

In [31]:
 product_id_map = dict(zip(df["product_code"], df["product_id"]))

In [34]:
df

Unnamed: 0,_id,user_id,product_id,event_type,timestamp,interaction,user_code,product_code
0,67df0db09b6d56326c835a88,U035,P105,view,05-03-2025 11:38,1,34,5
1,67df0db09b6d56326c835a89,U042,P110,add_to_cart,05-03-2025 06:30,3,41,10
2,67df0db09b6d56326c835a8a,U031,P108,add_to_cart,14-03-2025 09:50,3,30,8
3,67df0db09b6d56326c835a8b,U018,P104,view,06-03-2025 11:59,1,17,4
4,67df0db09b6d56326c835a8c,U017,P102,view,19-02-2025 19:35,1,16,2
...,...,...,...,...,...,...,...,...
1995,67df0db09b6d56326c836253,U035,P111,view,09-03-2025 02:21,1,34,11
1996,67df0db09b6d56326c836254,U040,P106,view,12-03-2025 21:30,1,39,6
1997,67df0db09b6d56326c836255,U031,P105,add_to_cart,10-03-2025 16:33,3,30,5
1998,67df0db09b6d56326c836256,U032,P104,view,11-03-2025 21:19,1,31,4


In [35]:
# Count the number of unique users and products — used for embedding dimensions
n_users = df["user_code"].nunique()

In [37]:
n_products = df["product_code"].nunique()

In [40]:
df["user_code"].unique()

array([34, 41, 30, 17, 16,  9,  4,  6, 22, 38, 29, 24, 20,  2, 21, 28, 36,
       25, 19, 13, 39,  7, 44, 32, 31, 23, 33, 26, 18, 43,  0, 47,  3,  5,
       49, 10, 48,  1, 27,  8, 35, 12, 40, 45, 37, 14, 42, 15, 46, 11],
      dtype=int8)

In [39]:
n_users

50