In [1]:
import os
from experiment_setup import setups
import subprocess
import torch
from codecarbon import EmissionsTracker
import json
from IPython.display import display, HTML
from datetime import datetime




In [19]:
# download the dataset, the links can be fund in the README
dataset_path = "../datasets/diginetica"
model_path = "../trained_models"

In [3]:
# Función para entrenar un modelo, rastrear las emisiones de CO2 y guardar la información de entrenamiento
def track_training_C02_emissions(command, trained_model_folder, lossFunction, dataset):

    # Inicializamos el tracker
    tracker = EmissionsTracker()
    
    try:
        # Obtenemos la fecha y hora de inicio
        start_time = datetime.now()

        #iniciamos el tracker
        tracker.start()
        
        # Ejecutamos el comando de entrenamiento
        training_process = subprocess.run(command, shell=True, capture_output=True, text=True)

        # Detenemos el tracker y obtenemos las emisiones finales
        emissions = tracker.stop()

        # Obtenemos la fecha y hora de finalización
        end_time = datetime.now()

        # Imprimimos la salida de la ejecución
        print(f"Salida de STDOUT: {training_process.stdout}")

    except FileNotFoundError as e:
        print(f"Error: {e}")

    except Exception as e:
        print(f"Unexpected Error: {e}")
    
    

    # Ruta del archivo JSON
    json_file_path = os.path.join("..", "trained_models", trained_model_folder ,"trainingData.json")
    
    # Leer el archivo JSON existente
    existing_data = []
    if os.path.exists(json_file_path):
        try:
            with open(json_file_path, 'r') as f:
                existing_data = json.load(f)
        except json.JSONDecodeError:
            print(f"El archivo {json_file_path} está vacío o contiene datos inválidos, se inicializará como una lista vacía.")
            existing_data = []

    # Preparar la información del entrenamiento
    training_info = {
        "training_iteration": len(existing_data) + 1,  # Número de iteración basado en el tamaño del dataset existente
        "date": start_time.strftime("%Y-%m-%d %H:%M:%S"),
        "execution_time_seconds": (end_time - start_time).total_seconds(),
        "CO2_emissions_kg": emissions,
        "LossFunction": lossFunction,
        "dataset": dataset

    }

    # Agregar la nueva información del entrenamiento
    existing_data.append(training_info)

    # Escribir los datos actualizados al archivo JSON
    with open(json_file_path, 'w') as f:
        json.dump(existing_data, f, indent=4)

    # Finalmente, retornamos las emisiones de CO2
    return emissions

## Run the preprocess script, specific to the dataset you chose

- The preprocessing script in general, executes the following steps:
    - Loads the raw data, with correct types
    - Creates the sessions
    - Removes duplicated items. An item is considered as a duplicate if the preceding (based on time) event in the same session contains the exact same item.
    - Performes iterative support filtering
        - Removes sessions with only one event
        - Removes items with less than 5 events
        - Until the size of the dataset changes


In [None]:
%run coveo_preproc.py --path $dataset_path

## Use a specific setup for your dataset

In [20]:
params = setups["diginetica"]["params_bprmax"]
params2 = setups["diginetica"]["params_xe"]

In [28]:
train_path = os.path.join(dataset_path,"diginetica_processed_view_train_full.tsv")
test_path = os.path.join(dataset_path,"diginetica_processed_view_test.tsv")

In [22]:
def create_torch_gru4rec_script(model_name, train_path, test_path, model_path, loss, optim, final_act, layers, batch_size, dropout_p_embed, dropout_p_hidden, learning_rate, sample_alpha, bpreg, n_epochs, n_sample, m):
    s_train_full = f" python ../Torch-GRU4Rec/main.py --save_path {model_path}/{model_name} --train_path {train_path} --loss {'nll' if loss =='cross-entropy' else loss} --optimizer {optim} --n_epochs {n_epochs} --embedding_size {layers} --hidden_size {layers} --n_layers {1} --final_act {'softmaxlogit' if final_act=='softmax' else final_act} --batch_size {batch_size} --dropout_p_embed {dropout_p_embed} --dropout_p_hidden {dropout_p_hidden} --lr {learning_rate} --n_sample {n_sample} --sample_alpha {sample_alpha} --bpreg {bpreg}"
    s_test_full = f" python ../Torch-GRU4Rec/main.py --train_path {train_path} --test_path {test_path} --model_path {model_path}/{model_name}/model_0000{n_epochs-1}.pt --test  --m {m}"
    return s_train_full, s_test_full

In [23]:
loss = params["loss"]
optim = params["optim"]
const_emb = params["constrained_embedding"]
embed = params["embedding"]
final_act = params["final_act"]
layers = params["layers"]
batch_size = params["batch_size"]
dropout_p_embed = params["dropout_p_embed"]
dropout_p_hidden = params["dropout_p_hidden"]
learning_rate = params["learning_rate"]
momentum = params["momentum"]
sample_alpha = params["sample_alpha"]
bpreg = params["bpreg"]
logq = params["logq"]
hidden_act = params["hidden_act"]
n_sample = params["n_sample"]
n_epochs = 5
m = '1 5 10 20'

In [11]:
loss2 = params2["loss"]
optim2 = params2["optim"]
const_emb2 = params2["constrained_embedding"]
embed2 = params2["embedding"]
final_act2 = params2["final_act"]
layers2 = params2["layers"]
batch_size2 = params2["batch_size"]
dropout_p_embed2 = params2["dropout_p_embed"]
dropout_p_hidden2 = params2["dropout_p_hidden"]
learning_rate2 = params2["learning_rate"]
momentum2 = params2["momentum"]
sample_alpha2 = params2["sample_alpha"]
bpreg2 = params2["bpreg"]
logq2 = params2["logq"]
hidden_act2 = params2["hidden_act"]
n_sample2 = params2["n_sample"]
n_epochs2 = 5
m2 = '1 5 10 20'

## Train & test the out-of-the-box model

In [29]:
train_script_oob_bpr, test_script_oob_bpr = create_torch_gru4rec_script(model_name='torch_gru4rec_oob_bprmax', train_path=train_path, test_path=test_path, model_path=model_path, loss=loss, optim=optim, final_act=final_act, layers=layers, batch_size=batch_size, dropout_p_embed=dropout_p_embed, dropout_p_hidden=dropout_p_hidden, learning_rate=learning_rate, sample_alpha=sample_alpha, bpreg=bpreg, n_epochs=n_epochs, n_sample=n_sample, m=m)

In [30]:
train_script_oob_xl, test_script_oob_xl = create_torch_gru4rec_script(model_name='torch_gru4rec_oob_bprmax', train_path=train_path, test_path=test_path, model_path=model_path, loss=loss2, optim=optim2, final_act=final_act2, layers=layers2, batch_size=batch_size2, dropout_p_embed=dropout_p_embed2, dropout_p_hidden=dropout_p_hidden2, learning_rate=learning_rate2, sample_alpha=sample_alpha2, bpreg=bpreg2, n_epochs=n_epochs2, n_sample=n_sample2, m=m2)

In [31]:
print(train_script_oob_bpr)
print(test_script_oob_bpr)
print("\n")
print(train_script_oob_xl)
print(test_script_oob_xl)

 python ../Torch-GRU4Rec/main.py --save_path ../trained_models/torch_gru4rec_oob_bprmax --train_path ../datasets/diginetica\diginetica_processed_view_train_full.tsv --loss bpr-max --optimizer adagrad --n_epochs 5 --embedding_size 512 --hidden_size 512 --n_layers 1 --final_act elu-1 --batch_size 128 --dropout_p_embed 0.5 --dropout_p_hidden 0.3 --lr 0.05 --n_sample 2048 --sample_alpha 0.3 --bpreg 0.9
 python ../Torch-GRU4Rec/main.py --train_path ../datasets/diginetica\diginetica_processed_view_train_full.tsv --test_path ../datasets/diginetica\diginetica_processed_view_test.tsv --model_path ../trained_models/torch_gru4rec_oob_bprmax/model_00004.pt --test  --m 1 5 10 20


 python ../Torch-GRU4Rec/main.py --save_path ../trained_models/torch_gru4rec_oob_bprmax --train_path ../datasets/diginetica\diginetica_processed_view_train_full.tsv --loss nll --optimizer adagrad --n_epochs 5 --embedding_size 192 --hidden_size 192 --n_layers 1 --final_act softmaxlogit --batch_size 240 --dropout_p_embed 0.

### Train the out-of-the-box model

In [32]:
for i in range(1):
    emissions = track_training_C02_emissions(train_script_oob_bpr, "torch_gru4rec_oob_bprmax", "BPR-Max", "Diginetica")
    # Imprimimos las emisiones de carbono con estilo
    if emissions is not None:
        display(HTML(f"<h2 style='color: green;'>Emisiones de CO2: {emissions} kg</h2>"))
    else:
        display(HTML("<h2 style='color: red;'>Hubo un error durante la ejecución del comando.</h2>"))

[codecarbon INFO @ 15:41:04] [setup] RAM Tracking...
[codecarbon INFO @ 15:41:04] [setup] GPU Tracking...
[codecarbon INFO @ 15:41:04] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 15:41:04] [setup] CPU Tracking...
[codecarbon INFO @ 15:41:06] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i7-13700HX
[codecarbon INFO @ 15:41:06] >>> Tracker's metadata:
[codecarbon INFO @ 15:41:06]   Platform system: Windows-11-10.0.22631-SP0
[codecarbon INFO @ 15:41:06]   Python version: 3.12.3
[codecarbon INFO @ 15:41:06]   CodeCarbon version: 2.4.2
[codecarbon INFO @ 15:41:06]   Available RAM : 29.701 GB
[codecarbon INFO @ 15:41:06]   CPU count: 24
[codecarbon INFO @ 15:41:06]   CPU model: 13th Gen Intel(R) Core(TM) i7-13700HX
[codecarbon INFO @ 15:41:06]   GPU count: 1
[codecarbon INFO @ 15:41:06]   GPU model: 1 x NVIDIA GeForce RTX 4060 Laptop GPU
[codecarbon INFO @ 15:41:21] Energy consumed for RAM : 0.000046 kWh. RAM Power : 11.1377534866333 W
[codecarbon INFO @ 15:41:21] 

Salida de STDOUT:                   Args                                             Values
0            save_path         ../trained_models/torch_gru4rec_oob_bprmax
1           train_path  ../datasets/diginetica\diginetica_processed_vi...
2           valid_path                                                   
3            test_path                                                   
4                 test                                              False
5                    m                                               [20]
6           model_path                                                   
7             n_epochs                                                  5
8                 loss                                            bpr-max
9            optimizer                                            adagrad
10                  lr                                               0.05
11      embedding_size                                                512
12         hidden_si

### Test the out-of-the-box model

In [33]:
test_process = subprocess.run(test_script_oob_bpr, shell=True, capture_output=True, text=True)
print(f"Salida de STDOUT: {test_process.stdout}")


Exception in thread Thread-63 (_readerthread):
Traceback (most recent call last):
  File "c:\Users\Juanc\AppData\Local\Programs\Python\Python312\Lib\threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "c:\Users\Juanc\AppData\Local\Programs\Python\Python312\Lib\site-packages\ipykernel\ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "c:\Users\Juanc\AppData\Local\Programs\Python\Python312\Lib\threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "c:\Users\Juanc\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1599, in _readerthread
    buffer.append(fh.read())
                  ^^^^^^^^^
  File "c:\Users\Juanc\AppData\Local\Programs\Python\Python312\Lib\encodings\cp1252.py", line 23, in decode
    return codecs.charmap_decode(input,self.errors,decoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeDecodeError: 'charmap' codec can't decode byte 0x8f in positio

Salida de STDOUT:                   Args                                             Values
0            save_path                                                   
1           train_path  ../datasets/diginetica\diginetica_processed_vi...
2           valid_path                                                   
3            test_path  ../datasets/diginetica\diginetica_processed_vi...
4                 test                                               True
5                    m                                     [1, 5, 10, 20]
6           model_path  ../trained_models/torch_gru4rec_oob_bprmax/mod...
7             n_epochs                                                  5
8                 loss                                                nll
9            optimizer                                            adagrad
10                  lr                                               None
11      embedding_size                                                 -1
12         hidden_si