In [1]:
import os
from experiment_setup import setups

In [2]:
# download the dataset, the links can be fund in the README
dataset_path = "../datasets/coveo_ecommerce"
model_path = "../trained_models"

## Run the preprocess script, specific to the dataset you chose

- The preprocessing script in general, executes the following steps:
    - Loads the raw data, with correct types
    - Creates the sessions
    - Removes duplicated items. An item is considered as a duplicate if the preceding (based on time) event in the same session contains the exact same item.
    - Performes iterative support filtering
        - Removes sessions with only one event
        - Removes items with less than 5 events
        - Until the size of the dataset changes


In [None]:
%run ../Preprocess/coveo_ecommerce_preproc.py --path $dataset_path

## Use a specific setup for your dataset

In [3]:
params = setups["coveo"]["params_bprmax"]

In [4]:
train_path = os.path.join(dataset_path,"coveo_processed_view_train_full.tsv")
test_path = os.path.join(dataset_path,"coveo_processed_view_test.tsv")

In [5]:
def create_gru4rec_pytorch_script(model_name, train_folder, train_data, test_data, model_path, loss, optim, final_act, layers, batch_size, dropout_p_embed, dropout_p_hidden, learning_rate, n_epochs, m, eval_hidden_reset, use_correct_loss, use_correct_mask_reset):
    checkpoint_dir = f"{model_path}\\{model_name}"
    s_train_full = (
        f"python ..\\GRU4REC-pytorch\\main.py --data_folder {train_folder} "
        f"--train_data {train_data} --valid_data {test_data} --checkpoint_dir {checkpoint_dir} "
        f"--num_layers 1 --embedding_dim {layers} --hidden_size {layers} "
        f"--loss_type {'BPR-max' if loss == 'bpr-max' else 'CrossEntropy'} --final_act {final_act} "
        f"--n_epochs {n_epochs} --batch_size {batch_size} --dropout_input {dropout_p_embed} "
        f"--dropout_hidden {dropout_p_hidden} --lr {learning_rate} --momentum 0.0 "
        f"--optimizer_type {'Adagrad' if optim == 'adagrad' else ''}"
        f"{' --eval_hidden_reset' if eval_hidden_reset else ''}"
        f"{' --use_correct_loss' if use_correct_loss else ''}"
        f"{' --use_correct_mask_reset' if use_correct_mask_reset else ''}"
    )
    s_test_full = s_train_full + f" --is_eval --load_model {checkpoint_dir}\\model_0000{n_epochs-1}.pt --m {m}"
    return s_train_full, s_test_full

In [6]:
loss = params["loss"]
optim = params["optim"]
const_emb = params["constrained_embedding"]
embed = params["embedding"]
final_act = params["final_act"]
layers = params["layers"]
batch_size = params["batch_size"]
dropout_p_embed = params["dropout_p_embed"]
dropout_p_hidden = params["dropout_p_hidden"]
learning_rate = params["learning_rate"]
momentum = params["momentum"]
sample_alpha = params["sample_alpha"]
bpreg = params["bpreg"]
logq = params["logq"]
hidden_act = params["hidden_act"]
n_epochs = 5
m = '1 5 10 20'

## Train & test the out-of-the-box model

In [7]:
train_folder, train_data = '/'.join(train_path.split('/')[:-1]), train_path.split('/')[-1]
test_folder, test_data = '/'.join(test_path.split('/')[:-1]), test_path.split('/')[-1]

In [8]:
train_script_oob, test_script_oob = create_gru4rec_pytorch_script(model_name='gru4rec_pytorch_oob_bprmax', train_folder=train_folder, train_data=train_data, test_data=test_data, model_path=model_path, loss=loss, optim=optim, final_act=final_act, layers=layers, batch_size=batch_size, dropout_p_embed=0.0, dropout_p_hidden=0.0, learning_rate=learning_rate, n_epochs=n_epochs, m=m, eval_hidden_reset=False, use_correct_loss=False, use_correct_mask_reset=False)

In [10]:
checkpoint_dir = "../trained_models/gru4rec_pytorch_oob_bprmax"
eval_hidden_reset=False
use_correct_loss=False
use_correct_mask_reset=False

In [12]:
import subprocess

# Comando de entrenamiento
process_train = subprocess.Popen(train_script_oob, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout_train, stderr_train = process_train.communicate()

print("Salida de STDOUT (Entrenamiento):")
print(stdout_train.decode())
print("Salida de STDERR (Entrenamiento):")
print(stderr_train.decode())

# Comando de evaluación
process_test = subprocess.Popen(test_script_oob, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout_test, stderr_test = process_test.communicate()

print("Salida de STDOUT (Evaluación):")
print(stdout_test.decode())
print("Salida de STDERR (Evaluación):")
print(stderr_test.decode())


In [9]:
from model_tracker import track_training_C02_emissions

track_training_C02_emissions(train_script_oob, "gru4rec_pytorch_oob_bprmax", "coveo")

[codecarbon INFO @ 18:07:42] [setup] RAM Tracking...
[codecarbon INFO @ 18:07:42] [setup] GPU Tracking...
[codecarbon INFO @ 18:07:43] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 18:07:43] [setup] CPU Tracking...
[codecarbon INFO @ 18:07:46] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i9-13900HX
[codecarbon INFO @ 18:07:46] >>> Tracker's metadata:
[codecarbon INFO @ 18:07:46]   Platform system: Windows-11-10.0.22631-SP0
[codecarbon INFO @ 18:07:46]   Python version: 3.12.3
[codecarbon INFO @ 18:07:46]   CodeCarbon version: 2.4.2
[codecarbon INFO @ 18:07:46]   Available RAM : 31.746 GB
[codecarbon INFO @ 18:07:46]   CPU count: 32
[codecarbon INFO @ 18:07:46]   CPU model: 13th Gen Intel(R) Core(TM) i9-13900HX
[codecarbon INFO @ 18:07:46]   GPU count: 1
[codecarbon INFO @ 18:07:46]   GPU model: 1 x NVIDIA GeForce RTX 4090 Laptop GPU


CarbonTracker: The following components were found: GPU with device(s) NVIDIA GeForce RTX 4090 Laptop GPU.
The training of models in this work is estimated to use 0.000 kWh of electricity contributing to 0.000 kg of CO2eq. Measured by carbontracker (https://github.com/lfwa/carbontracker).


[codecarbon INFO @ 18:08:05] Energy consumed for RAM : 0.000050 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 18:08:05] Energy consumed for all GPUs : 0.000075 kWh. Total GPU Power : 18.102358800362374 W
[codecarbon INFO @ 18:08:05] Energy consumed for all CPUs : 0.000177 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 18:08:05] 0.000302 kWh of electricity used since the beginning.
[codecarbon INFO @ 18:08:20] Energy consumed for RAM : 0.000099 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 18:08:20] Energy consumed for all GPUs : 0.000323 kWh. Total GPU Power : 59.311941401261564 W
[codecarbon INFO @ 18:08:20] Energy consumed for all CPUs : 0.000354 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 18:08:20] 0.000776 kWh of electricity used since the beginning.
[codecarbon INFO @ 18:08:35] Energy consumed for RAM : 0.000149 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 18:08:35] Energy consumed for all GPUs : 0.000562 kWh. Total GPU Power : 57.400719123

CarbonTracker: Average carbon intensity during training was 374.46 gCO2/kWh at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: 
Actual consumption for 1 epoch(s):
	Time:	0:03:54
	Energy:	0.005298935666 kWh
	CO2eq:	1.984256564879 g
	This is equivalent to:
	0.018458200604 km travelled by car
CarbonTracker: Live carbon intensity could not be fetched at detected location: Santiago, Santiago Metropolitan, CL. Defaulted to average carbon intensity for CL in 2021 of 374.46 gCO2/kWh. at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: 
Predicted consumption for 1 epoch(s):
	Time:	0:03:54
	Energy:	0.005298935666 kWh
	CO2eq:	1.984256564879 g
	This is equivalent to:
	0.018458200604 km travelled by car
CarbonTracker: Finished monitoring.
Salida de STDOUT:                       Args                                             Values
0              hidden_size                                                512
1               num_layers                   

0.0024018610206306937

### Train the out-of-the-box model

In [17]:
os.system(train_script_oob)

sh: 1: python: not found


32512

### Test the out-of-the-box model

In [None]:
os.system(test_script_oob)

## Train & test inference fix model

In [None]:
train_script_inffix, test_script_inffix = create_gru4rec_pytorch_script(model_name='gru4rec_pytorch_inffix_bprmax', train_folder=train_folder, train_data=train_data, test_data=test_data, model_path=model_path, loss=loss, optim=optim, final_act=final_act, layers=layers, batch_size=batch_size, dropout_p_embed=0.0, dropout_p_hidden=0.0, learning_rate=learning_rate, n_epochs=n_epochs, m=m, eval_hidden_reset=True, use_correct_loss=False, use_correct_mask_reset=False)

### Train the out-of-the-box eval fix model

In [None]:
os.system(train_script_inffix)

### Test the out-of-the-box eval fix model

In [None]:
os.system(test_script_inffix)

## Train & test the major fix model

In [None]:
train_script_majorfix, test_script_majorfix = create_gru4rec_pytorch_script(model_name='gru4rec_pytorch_majorfix_bprmax', train_folder=train_folder, train_data=train_data, test_data=test_data, model_path=model_path, loss=loss, optim=optim, final_act=final_act, layers=layers, batch_size=batch_size, dropout_p_embed=dropout_p_embed, dropout_p_hidden=dropout_p_hidden, learning_rate=learning_rate, n_epochs=n_epochs, m=m, eval_hidden_reset=True, use_correct_loss=True, use_correct_mask_reset=True)

### Train the major fix model

In [None]:
os.system(train_script_majorfix)

### Test the major fix model

In [None]:
os.system(test_script_majorfix)