# Practical Work in AI: Enhancing item relevance scores with psychology-based interest over time for music recommender systems

Author: Laura Legat

Matriculation ID: 51868012

In [1]:
# import access to Google Drive files

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# import needed libraries

import os
import torch
import sys
import pandas as pd
import importlib

!pip install optuna
import optuna

!pip install tensorboardX

print('\nAll imports successful!')


All imports successful!


In [9]:
# Check if gpu is available
if torch.cuda.is_available():
    device = torch.device('cuda')
    device_name = torch.cuda.get_device_name(device)
    print(f'Using GPU: {device_name}')
else:
    device = torch.device('cpu')
    print('Using CPU')

print(f'Current device: {device}')

Using CPU
Current device: cpu


In [5]:
# create train-val-test sets for Ex2Vec training, as well as sequences for GRU4Rec training
!python /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/preprocess.py -sl 50 -st 1 -sm Y

Pre-processing dataset for Ex2Vec...
Saved processed.csv
Pre-processing dataset for GRU4Rec...
Saved sequenced files for GRU4Rec


In [6]:
# import custom code

# Append the directory containing 'data_sampler' and 'ex2vec' to Python's search path
sys.path.append('/content/drive/MyDrive/JKU/practical_work/Practical-Work-AI')

# imports modules for preparing data and for training/evaluating the ex2vec model
import data_sampler
from ex2vec import Ex2VecEngine

#import dir of gru4rec_pytorch module to python path in order to be able to access GRU4Rec model class to be able to load it
sys.path.append('/content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork')
import gru4rec_pytorch
import evaluation as GRUeval

The size of the training set is: 18061
The size of the validation set is: 2562
The size of the test set is: 5213


In [80]:
# training + hyperparameter optimization with seq_train.csv and seq_val.csv on GRU4Rec

!python /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/optuna_paropt.py /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/data/seq_train.csv -opf /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork/paramspaces/deezer_parspace_GRU4Rec.json -nt 2 -p /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork/run.py -o /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/optim/best_params_gru4rec.json -mo gru4rec -t /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/data/seq_val.csv -pf /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork/paramfiles/deezer_paramfile.py -d cpu -m "1 5 20" -ik "itemId" -tk "timestamp"

Loaded parameters from file: /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork/paramfiles/deezer_paramfile.py
--------------------------------------------------------------------------------
PARAMETER SPACE:
	PARAMETER name=loss 	 type=categorical 	 options: [bpr-max,cross-entropy]
	PARAMETER name=constrained_embedding 	 type=categorical 	 options: [False,True]
	PARAMETER name=embedding 	 type=categorical 	 options: [0,64]
	PARAMETER name=elu_param 	 type=categorical 	 options: [0.5,1,0]
	PARAMETER name=layers 	 type=int 	 range=[64..64] (step=1) 	 UNIFORM scale
	PARAMETER name=n_epochs 	 type=categorical 	 options: [2,3]
	PARAMETER name=batch_size 	 type=int 	 range=[32..256] (step=16) 	 UNIFORM scale
	PARAMETER name=dropout_p_embed 	 type=float 	 range=[0.0..0.5] (step=0.05) 	 UNIFORM scale
	PARAMETER name=dropout_p_hidden 	 type=float 	 range=[0.0..0.7] (step=0.05) 	 UNIFORM scale
	PARAMETER name=learning_rate 	 type=float 	 range=[0.01..0.25] (step=0.

In [None]:
# training + hyperparameter optim ex2vec

!python /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/optuna_paropt.py /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/data/seq_train.csv -opf /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork/paramspaces/deezer_parspace_Ex2Vec.json -nt 2 -p /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/train.py -o /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/optim/best_params_ex2vec.json -mo ex2vec

In [None]:
# check that deezer parameter file exists
param_file_path = '/content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork/paramfiles/deezer_paramfile.py'
assert os.path.isfile(param_file_path), f'Parameter file not found at {param_file_path}'

In [None]:
# train baseline Ex2Vec with Ex2Vec item embeddings, without hyperparameter tuning
!python /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/train.py

The size of the training set is: 1106989
The size of the validation set is: 156748
The size of the test set is: 320078
Ex2Vec(
  (user_lamb): Embedding(3623, 1)
  (user_bias): Embedding(3623, 1)
  (item_bias): Embedding(879, 1)
  (embedding_user): Embedding(3623, 64)
  (embedding_item): Embedding(879, 64)
  (logistic): Sigmoid()
)
global_lamb <class 'torch.Tensor'> torch.Size([])
alpha <class 'torch.Tensor'> torch.Size([])
beta <class 'torch.Tensor'> torch.Size([])
gamma <class 'torch.Tensor'> torch.Size([])
cutoff <class 'torch.Tensor'> torch.Size([])
user_lamb.weight <class 'torch.Tensor'> torch.Size([3623, 1])
user_bias.weight <class 'torch.Tensor'> torch.Size([3623, 1])
item_bias.weight <class 'torch.Tensor'> torch.Size([879, 1])
embedding_user.weight <class 'torch.Tensor'> torch.Size([3623, 64])
embedding_item.weight <class 'torch.Tensor'> torch.Size([879, 64])
Using validation set for evaluation

Started training of model:  ex2vec_baseline_BS512LR5e-05L_DIM64
Epoch 0 starts !
100

In [None]:
# train baseline Ex2Vec with Ex2Vec item embeddings, with hyperparameter tuning
!python /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/train.py -o Y

The size of the training set is: 1106989
The size of the validation set is: 156748
The size of the test set is: 320078
Starting hyperparameter optimization with Optuna...
[32m[I 2024-07-26 00:25:39,874][0m A new study created in memory with name: no-name-389d2525-6798-487f-a8cf-ef36be329ce5[0m
  LR_optim = trial.suggest_loguniform('LR', 1e-5, 1e-3)
  l2_regularization_optim = trial.suggest_loguniform('l2_regularization', 1e-5, 1e-2)
Ex2Vec(
  (user_lamb): Embedding(3623, 1)
  (user_bias): Embedding(3623, 1)
  (item_bias): Embedding(879, 1)
  (embedding_user): Embedding(3623, 64)
  (embedding_item): Embedding(879, 64)
  (logistic): Sigmoid()
)
global_lamb <class 'torch.Tensor'> torch.Size([])
alpha <class 'torch.Tensor'> torch.Size([])
beta <class 'torch.Tensor'> torch.Size([])
gamma <class 'torch.Tensor'> torch.Size([])
cutoff <class 'torch.Tensor'> torch.Size([])
user_lamb.weight <class 'torch.Tensor'> torch.Size([3623, 1])
user_bias.weight <class 'torch.Tensor'> torch.Size([3623, 

In [None]:
# TODO: retrain ex2vec with the best parameters such that we can compare it to tuned Ex2Vec + GRU4Rec combo

In [26]:
# train GRU4Rec without hyperparameter tuning

"""
params: (from https://github.com/hidasib/GRU4Rec/blob/master/README.md)
  -t    Testset path
  -pf   Parameter file path
  -s    Path to save the state dict to
  -m    Calculate recall, MRR etc. at the given list length
  -ik   Item key
  -tk   Timestamp key
  -d    Device
"""

!python /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork/run.py /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/data/seq_train.csv -t /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/data/seq_val.csv -pf /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork/paramfiles/deezer_paramfile.py -s /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/models/GRU4Rec.pt -m 1 5 10 20 -d cpu -ik "itemId" -tk "timestamp"

hello1
hello2
type:  <class 'list'>
Loaded parameters from file: /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork/paramfiles/deezer_paramfile.py
Creating GRU4Rec model on device "cpu"
SET   loss                    TO   bpr-max   (type: <class 'str'>)
SET   constrained_embedding   TO   True      (type: <class 'bool'>)
SET   embedding               TO   0         (type: <class 'int'>)
SET   elu_param               TO   1.0       (type: <class 'float'>)
SET   layers                  TO   [64]      (type: <class 'list'>)
SET   n_epochs                TO   2         (type: <class 'int'>)
SET   batch_size              TO   50        (type: <class 'int'>)
SET   dropout_p_embed         TO   0.4       (type: <class 'float'>)
SET   dropout_p_hidden        TO   0.2       (type: <class 'float'>)
SET   learning_rate           TO   0.05      (type: <class 'float'>)
SET   momentum                TO   0.3       (type: <class 'float'>)
SET   n_sample                TO   

In [None]:
#TODO: train GRU4Rec with hyperparameter tuning to get better item representation

In [None]:
# re-train Ex2Vec with GRU4Rec item embeddings, with hyperparameter tuning
!python /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/train.py -ep /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/models/GRU4Rec.pt -o Y

The size of the training set is: 1106989
The size of the validation set is: 156748
The size of the test set is: 320078
Starting hyperparameter optimization with Optuna...
[32m[I 2024-07-26 00:59:51,047][0m A new study created in memory with name: no-name-823705ee-b080-4154-88b7-a2c9faaa4b9e[0m
  LR_optim = trial.suggest_loguniform('LR', 1e-5, 1e-3)
  l2_regularization_optim = trial.suggest_loguniform('l2_regularization', 1e-5, 1e-2)
Ex2Vec(
  (user_lamb): Embedding(3623, 1)
  (user_bias): Embedding(3623, 1)
  (item_bias): Embedding(879, 1)
  (embedding_user): Embedding(3623, 64)
  (embedding_item): Embedding(879, 64)
  (logistic): Sigmoid()
)
global_lamb <class 'torch.Tensor'> torch.Size([])
alpha <class 'torch.Tensor'> torch.Size([])
beta <class 'torch.Tensor'> torch.Size([])
gamma <class 'torch.Tensor'> torch.Size([])
cutoff <class 'torch.Tensor'> torch.Size([])
user_lamb.weight <class 'torch.Tensor'> torch.Size([3623, 1])
user_bias.weight <class 'torch.Tensor'> torch.Size([3623, 

Best Ex2Vec with hyperparam tuning: ACC = 0.5329, B_ACC = 0.5291, RECALL = 0.6017, F1 = 0.5306

Best Ex2Vec + GRU4Rec item embds with hyperparameter tuning: ACC = 0.5721, B_ACC = 0.5666, RECALL = 0.6743, F1 = 0.5670