In [14]:
import os
from experiment_setup import setups
from model_tracker import track_training_C02_emissions

In [15]:
dataset = "yoochoose"   # coveo, diginetica, rees46, retailrocket, yoochoose
loss_function = "xe"   # bprmax, xe

dataset_path = f"../datasets/{dataset}"
model_path = "../trained_models"

## Run the preprocess script, specific to the dataset you chose

- The preprocessing script in general, executes the following steps:
    - Loads the raw data, with correct types
    - Creates the sessions
    - Removes duplicated items. An item is considered as a duplicate if the preceding (based on time) event in the same session contains the exact same item.
    - Performes iterative support filtering
        - Removes sessions with only one event
        - Removes items with less than 5 events
        - Until the size of the dataset changes


In [None]:
%run coveo_preproc.py --path $dataset_path

## Use a specific setup for your dataset

In [16]:
params = setups[dataset][f"params_{loss_function}"]

In [17]:
train_path = os.path.join(dataset_path,f"{dataset}_processed_view_train_full.tsv")
test_path = os.path.join(dataset_path,f"{dataset}_processed_view_test.tsv")

In [18]:
def create_torch_gru4rec_script(model_name, train_path, test_path, model_path, loss, optim, final_act, layers, batch_size, dropout_p_embed, dropout_p_hidden, learning_rate, sample_alpha, bpreg, n_epochs, n_sample, m):
    s_train_full = f"python ../Torch-GRU4Rec/main.py --save_path {model_path}/{model_name} --train_path {train_path} --loss {'nll' if loss =='cross-entropy' else loss} --optimizer {optim} --n_epochs {n_epochs} --embedding_size {layers} --hidden_size {layers} --n_layers {1} --final_act {'softmaxlogit' if final_act=='softmax' else final_act} --batch_size {batch_size} --dropout_p_embed {dropout_p_embed} --dropout_p_hidden {dropout_p_hidden} --lr {learning_rate} --n_sample {n_sample} --sample_alpha {sample_alpha} --bpreg {bpreg}"
    s_test_full = f"python ../Torch-GRU4Rec/main.py --train_path {train_path} --test_path {test_path} --model_path {model_path}/{model_name}/model_0000{n_epochs-1}.pt --test  --m {m}"
    return s_train_full, s_test_full

In [19]:
loss = params["loss"]
optim = params["optim"]
const_emb = params["constrained_embedding"]
embed = params["embedding"]
final_act = params["final_act"]
layers = params["layers"]
batch_size = params["batch_size"]
dropout_p_embed = params["dropout_p_embed"]
dropout_p_hidden = params["dropout_p_hidden"]
learning_rate = params["learning_rate"]
momentum = params["momentum"]
sample_alpha = params["sample_alpha"]
bpreg = params["bpreg"]
logq = params["logq"]
hidden_act = params["hidden_act"]
n_sample = params["n_sample"]
n_epochs = 5
m = '1 5 10 20'

## Train & test (out-of-the-box model)

In [20]:
train_script_oob, test_script_oob = create_torch_gru4rec_script(model_name=f'torch_gru4rec_{loss_function}', train_path=train_path, test_path=test_path, model_path=model_path, loss=loss, optim=optim, final_act=final_act, layers=layers, batch_size=batch_size, dropout_p_embed=dropout_p_embed, dropout_p_hidden=dropout_p_hidden, learning_rate=learning_rate, sample_alpha=sample_alpha, bpreg=bpreg, n_epochs=n_epochs, n_sample=n_sample, m=m)

### Train

In [21]:
track_training_C02_emissions(train_script_oob, f"torch_gru4rec_{loss_function}", dataset)

[codecarbon INFO @ 17:36:54] [setup] RAM Tracking...
[codecarbon INFO @ 17:36:54] [setup] GPU Tracking...
[codecarbon INFO @ 17:36:54] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 17:36:54] [setup] CPU Tracking...
[codecarbon INFO @ 17:36:56] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i9-13900HX
[codecarbon INFO @ 17:36:56] >>> Tracker's metadata:
[codecarbon INFO @ 17:36:56]   Platform system: Windows-11-10.0.22631-SP0
[codecarbon INFO @ 17:36:56]   Python version: 3.12.3
[codecarbon INFO @ 17:36:56]   CodeCarbon version: 2.4.2
[codecarbon INFO @ 17:36:56]   Available RAM : 31.746 GB
[codecarbon INFO @ 17:36:56]   CPU count: 32
[codecarbon INFO @ 17:36:56]   CPU model: 13th Gen Intel(R) Core(TM) i9-13900HX
[codecarbon INFO @ 17:36:56]   GPU count: 1
[codecarbon INFO @ 17:36:56]   GPU model: 1 x NVIDIA GeForce RTX 4090 Laptop GPU
[codecarbon INFO @ 17:37:14] Energy consumed for RAM : 0.000050 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 17:37:14

Salida de STDOUT:                   Args                                             Values
0            save_path                 ../trained_models/torch_gru4rec_xe
1           train_path  ../datasets/yoochoose\yoochoose_processed_view...
2           valid_path                                                   
3            test_path                                                   
4                 test                                              False
5                    m                                               [20]
6           model_path                                                   
7             n_epochs                                                  5
8                 loss                                                nll
9            optimizer                                            adagrad
10                  lr                                               0.07
11      embedding_size                                                480
12         hidden_si

0.16770314975238754

### Test

In [22]:
print(test_script_oob)
# os.system(test_script_oob)

python ../Torch-GRU4Rec/main.py --train_path ../datasets/yoochoose\yoochoose_processed_view_train_full.tsv --test_path ../datasets/yoochoose\yoochoose_processed_view_test.tsv --model_path ../trained_models/torch_gru4rec_xe/model_00004.pt --test  --m 1 5 10 20
