## Test the model with your own dataset of the same distribution.
- Just change the file path in cell 2.

In [1]:
# Packages
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, Input, Dot, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import keras
import seaborn as sns
from sklearn import preprocessing as prep
import pickle

In [2]:
# Load the data of your choice
# Load the dataset
file_path = '/content/drive/MyDrive/dataquest_dataset.csv'
dataquest_data = pd.read_csv(file_path)

# Display the first few rows of the dataset
dataquest_data.head(5)

Unnamed: 0,idcol,interaction,int_date,item,page,tod,item_type,item_descrip,segment,beh_segment,active_ind
0,755,DISPLAY,17JAN2023,NONE,Screen1,Afternoon,ALL,,segment3,B01,Semi Active
1,4521,DISPLAY,27FEB2023,NONE,Screen1,Afternoon,ALL,,segment1,B07,Semi Active
2,4521,DISPLAY,18FEB2023,NONE,Screen1,Afternoon,ALL,,segment1,B07,Semi Active
3,4521,DISPLAY,30JAN2023,NONE,Screen1,Morning,ALL,,segment1,B07,Semi Active
4,4521,CLICK,05FEB2023,IBAB,Screen1,Afternoon,INSURE,GENERIC MESSAGE,segment1,B07,Semi Active


In [3]:
# Convert interaction dates to datetime
dataquest_data['int_date'] = pd.to_datetime(dataquest_data['int_date'], format='%d%b%Y')
dataquest_data.head(5)

Unnamed: 0,idcol,interaction,int_date,item,page,tod,item_type,item_descrip,segment,beh_segment,active_ind
0,755,DISPLAY,2023-01-17,NONE,Screen1,Afternoon,ALL,,segment3,B01,Semi Active
1,4521,DISPLAY,2023-02-27,NONE,Screen1,Afternoon,ALL,,segment1,B07,Semi Active
2,4521,DISPLAY,2023-02-18,NONE,Screen1,Afternoon,ALL,,segment1,B07,Semi Active
3,4521,DISPLAY,2023-01-30,NONE,Screen1,Morning,ALL,,segment1,B07,Semi Active
4,4521,CLICK,2023-02-05,IBAB,Screen1,Afternoon,INSURE,GENERIC MESSAGE,segment1,B07,Semi Active


In [4]:
# Encoding of user IDs and item types.
user_label_encoder, item_label_encoder = LabelEncoder(), LabelEncoder()
dataquest_data['user_id'] = user_label_encoder.fit_transform(dataquest_data['idcol']) # Fit label encoder and return encoded labels.
dataquest_data['item_id'] = item_label_encoder.fit_transform(dataquest_data['item_type']) # Fit label encoder and return encoded labels.
dataquest_data.head(10)

Unnamed: 0,idcol,interaction,int_date,item,page,tod,item_type,item_descrip,segment,beh_segment,active_ind,user_id,item_id
0,755,DISPLAY,2023-01-17,NONE,Screen1,Afternoon,ALL,,segment3,B01,Semi Active,0,0
1,4521,DISPLAY,2023-02-27,NONE,Screen1,Afternoon,ALL,,segment1,B07,Semi Active,1,0
2,4521,DISPLAY,2023-02-18,NONE,Screen1,Afternoon,ALL,,segment1,B07,Semi Active,1,0
3,4521,DISPLAY,2023-01-30,NONE,Screen1,Morning,ALL,,segment1,B07,Semi Active,1,0
4,4521,CLICK,2023-02-05,IBAB,Screen1,Afternoon,INSURE,GENERIC MESSAGE,segment1,B07,Semi Active,1,2
5,4521,CHECKOUT,2023-02-05,IBAB,Screen1,Afternoon,INSURE,GENERIC MESSAGE,segment1,B07,Semi Active,1,2
6,6145,DISPLAY,2023-02-26,NONE,Screen1,Evening,ALL,,segment3,B01,Cold Start,2,0
7,6145,DISPLAY,2023-01-27,NONE,Screen1,Early,ALL,,segment3,B01,Cold Start,2,0
8,6145,DISPLAY,2023-02-10,NONE,Screen1,Morning,ALL,,segment3,B01,Cold Start,2,0
9,6145,DISPLAY,2023-01-10,NONE,Screen1,Afternoon,ALL,,segment3,B01,Cold Start,2,0


In [5]:
# Create a spreadsheet-style pivot table as a DataFrame
interaction_matrix = dataquest_data.pivot_table(index='user_id', columns='item_id', values='interaction', aggfunc='count', fill_value=0)
interaction_matrix.head()

item_id,0,1,2,3,4,5,6
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,1,0,0,0,0,0,0
1,3,0,2,0,0,0,0
2,4,0,0,0,0,0,0
3,1,0,0,0,0,0,0
4,3,0,0,0,0,0,0


# Load the trained model and make predictions on unseen data

In [18]:
loaded_model = pickle.load(open('/content/drive/MyDrive/collaborative_recommender.pkl', 'rb'))
X, W, Y = loaded_model
X, W, Y = np.array(X), np.array(W), np.array(Y)

# Make predictions.

In [19]:
preds = np.matmul(X, np.transpose(W)) + Y

for i in range(preds.shape[0])[:10]:
  for j in range(preds.shape[1]):
    if Y[i, j] != 0:
      print("For user", user_label_encoder.inverse_transform([i]), "predicted", preds[i, j], "for ", Y[i, j], " which is ", item_label_encoder.inverse_transform([Y[i, j]]))

For user [755] predicted 1.181225280182541 for  1  which is  ['CONNECT']
For user [4521] predicted 3.0247833708208107 for  3  which is  ['INVEST']
For user [4521] predicted 1.9574345325178917 for  2  which is  ['INSURE']
For user [6145] predicted 4.18108969739321 for  4  which is  ['LEND']
For user [7125] predicted 1.1792485029364976 for  1  which is  ['CONNECT']
For user [8469] predicted 3.181283784928689 for  3  which is  ['INVEST']
For user [13768] predicted 1.179343027133289 for  1  which is  ['CONNECT']
For user [14454] predicted 0.6744006673446157 for  1  which is  ['CONNECT']
For user [14454] predicted 2.283750377356596 for  2  which is  ['INSURE']
For user [15000] predicted 0.6585199163348812 for  1  which is  ['CONNECT']
For user [15000] predicted 2.3530974228430024 for  2  which is  ['INSURE']
For user [15784] predicted 1.1795377360504793 for  1  which is  ['CONNECT']
For user [22210] predicted 2.181160537997758 for  2  which is  ['INSURE']
