In [1]:
# import access to Google Drive files

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# import needed libraries

import os
import torch
import sys
import pandas as pd
import importlib

In [None]:
# create train-val-test sets for Ex2Vec training, as well as sequences for GRU4Rec training
!python /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/preprocess.py

Pre-processing dataset for Ex2Vec...
Traceback (most recent call last):
  File "/content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/preprocess.py", line 26, in <module>
    df = pd.read_csv(orig_dataset)
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py", line 912, in read_csv
    return _read(filepath_or_buffer, kwds)
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py", line 577, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py", line 1407, in __init__
    self._engine = self._make_engine(f, self.engine)
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py", line 1679, in _make_engine
    return mapping[engine](f, **self.options)
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/c_parser_wrapper.py", line 93, in __init__
    self._reader = parsers.TextReader(src, **kwds)
  File "pandas/_libs/pa

In [3]:
!pip install tensorboardX

Collecting tensorboardX
  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/101.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorboardX
Successfully installed tensorboardX-2.6.2.2


In [4]:
# import custom code

# Append the directory containing 'data_sampler' and 'ex2vec' to Python's search path
sys.path.append('/content/drive/MyDrive/JKU/practical_work/Practical-Work-AI')

# imports modules for preparing data and for training/evaluating the ex2vec model
import data_sampler
from ex2vec import Ex2VecEngine

#import dir of gru4rec_pytorch module to python path in order to be able to access GRU4Rec model class to be able to load it
sys.path.append('/content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork')
from gru4rec_pytorch import GRU4RecModel
import evaluation as GRUeval

The size of the training set is: 1106989
The size of the validation set is: 156748
The size of the test set is: 320078


In [5]:
# Check if gpu is available
if torch.cuda.is_available():
    device = torch.device('cuda')
    device_name = torch.cuda.get_device_name(device)
    print(f'Using GPU: {device_name}')
else:
    device = torch.device('cpu')
    print('Using CPU')

print(f'Current device: {device}')

Using GPU: Tesla T4
Current device: cuda


In [6]:
# check that deezer parameter file exists
param_file_path = '/content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork/paramfiles/deezer_paramfile.py'
assert os.path.isfile(param_file_path), f'Parameter file not found at {param_file_path}'

In [7]:
# train Ex2Vec
!python /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/train.py

The size of the training set is: 1106989
The size of the validation set is: 156748
The size of the test set is: 320078
Ex2Vec(
  (user_lamb): Embedding(3623, 1)
  (user_bias): Embedding(3623, 1)
  (item_bias): Embedding(879, 1)
  (embedding_user): Embedding(3623, 64)
  (embedding_item): Embedding(879, 64)
  (logistic): Sigmoid()
)
global_lamb <class 'torch.Tensor'> torch.Size([])
alpha <class 'torch.Tensor'> torch.Size([])
beta <class 'torch.Tensor'> torch.Size([])
gamma <class 'torch.Tensor'> torch.Size([])
cutoff <class 'torch.Tensor'> torch.Size([])
user_lamb.weight <class 'torch.Tensor'> torch.Size([3623, 1])
user_bias.weight <class 'torch.Tensor'> torch.Size([3623, 1])
item_bias.weight <class 'torch.Tensor'> torch.Size([879, 1])
embedding_user.weight <class 'torch.Tensor'> torch.Size([3623, 64])
embedding_item.weight <class 'torch.Tensor'> torch.Size([879, 64])
Using validation set for evaluation

started training model:  ex2vec_BS512LR5e-05L_DIM64
Epoch 0 starts !
100% 2163/2163 

In [None]:
# train GRU4Rec

"""
params: (from https://github.com/hidasib/GRU4Rec/blob/master/README.md)
  -t    Testset path
  -pf   Parameter file path
  -s    Path to save the state dict to
  -m    Calculate recall, MRR etc. at the given list length
  -ik   Item key
  -tk   Timestamp key
  -d    Device
"""

!python /content/drive/MyDrive/JKU/practical_work/GRU4Rec/run.py /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/data/seq_train.csv -t /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/data/seq_val.csv -pf /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/GRU4Rec_PyTorch_Fork/paramfiles/deezer_paramfile.py -s /content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/models/GRU4Rec.pt -m 1 5 10 20 -ik "itemId" -tk "timestamp" -d cpu

In [None]:
# extract item embeddings from GRU4Rec Model

model_loaded = torch.load('/content/drive/MyDrive/JKU/practical_work/Practical-Work-AI/models/GRU4Rec.pt')
item_embds = model_loaded.model.Wy.weight.data
item_embds.shape

In [None]:
# retrain Ex2Vec with GRU4Rec item embeddings

In [None]:
# compare performance