In [1]:
! codecarbon init


Welcome to CodeCarbon, here is your experiment id:
8c5eece3-1309-4aea-a822-e8f636025071

CodeCarbon automatically added this id to your local config: ./.codecarbon.config



In [79]:
import os
from experiment_setup import setups
import torch
from codecarbon import EmissionsTracker
from datetime import datetime
import json
from IPython.display import display, HTML


In [102]:
# Función para entrenar un modelo, rastrear las emisiones de CO2 y guardar la información de entrenamiento
def track_training_C02_emissions(command, trained_model_folder):

    # Inicializamos el tracker
    tracker = EmissionsTracker()
    
    try:
        # Obtenemos la fecha y hora de inicio
        start_time = datetime.now()

        #iniciamos el tracker
        tracker.start()

        # Ejecutamos el comando de entrenamiento
        training_process = subprocess.run(command, shell=True, capture_output=True, text=True)

        # Detenemos el tracker y obtenemos las emisiones finales
        emissions = tracker.stop()

        # Obtenemos la fecha y hora de finalización
        end_time = datetime.now()

        # Imprimimos la salida de la ejecución
        print(f"Salida de STDOUT: {training_process.stdout}")

    except FileNotFoundError as e:
        print(f"Error: {e}")

    except Exception as e:
        print(f"Unexpected Error: {e}")
    
    

    # Ruta del archivo JSON
    json_file_path = os.path.join("..", "trained_models", trained_model_folder ,"trainingData.json")
    
    # Leer el archivo JSON existente
    existing_data = []
    if os.path.exists(json_file_path):
        try:
            with open(json_file_path, 'r') as f:
                existing_data = json.load(f)
        except json.JSONDecodeError:
            print(f"El archivo {json_file_path} está vacío o contiene datos inválidos, se inicializará como una lista vacía.")
            existing_data = []

    # Preparar la información del entrenamiento
    training_info = {
        "training_iteration": len(existing_data) + 1,  # Número de iteración basado en el tamaño del dataset existente
        "date": start_time.strftime("%Y-%m-%d %H:%M:%S"),
        "execution_time_seconds": (end_time - start_time).total_seconds(),
        "CO2_emissions_kg": emissions
    }

    # Agregar la nueva información del entrenamiento
    existing_data.append(training_info)

    # Escribir los datos actualizados al archivo JSON
    with open(json_file_path, 'w') as f:
        json.dump(existing_data, f, indent=4)

    # Finalmente, retornamos las emisiones de CO2
    return emissions

In [5]:
# download the dataset, the links can be fund in the README
dataset_path = "../datasets/coveo_ecommerce"
model_path = "../trained_models"

In [45]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device: ", device)

Using device:  cuda


## Run the preprocess script, specific to the dataset you chose

- The preprocessing script in general, executes the following steps:
    - Loads the raw data, with correct types
    - Creates the sessions
    - Removes duplicated items. An item is considered as a duplicate if the preceding (based on time) event in the same session contains the exact same item.
    - Performes iterative support filtering
        - Removes sessions with only one event
        - Removes items with less than 5 events
        - Until the size of the dataset changes


In [8]:
# Definimos el tracker de emisiones
tracker = EmissionsTracker()

# Iniciamos el tracker
tracker.start()

%run ../Preprocess/coveo_preproc.py --path $dataset_path

# terminado el preprocesamiento, se pausa el tracker y obtenemos el C02 emitido
emissions = tracker.stop()

# Imprimimos las emisiones
print(f"Emisiones de CO2: {emissions} kg")


[codecarbon INFO @ 12:38:54] [setup] RAM Tracking...
[codecarbon INFO @ 12:38:54] [setup] GPU Tracking...
[codecarbon INFO @ 12:38:54] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 12:38:54] [setup] CPU Tracking...
[codecarbon INFO @ 12:38:56] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i7-13700HX
[codecarbon INFO @ 12:38:56] >>> Tracker's metadata:
[codecarbon INFO @ 12:38:56]   Platform system: Windows-11-10.0.22631-SP0
[codecarbon INFO @ 12:38:56]   Python version: 3.12.3
[codecarbon INFO @ 12:38:56]   CodeCarbon version: 2.4.2
[codecarbon INFO @ 12:38:56]   Available RAM : 31.701 GB
[codecarbon INFO @ 12:38:56]   CPU count: 24
[codecarbon INFO @ 12:38:56]   CPU model: 13th Gen Intel(R) Core(TM) i7-13700HX
[codecarbon INFO @ 12:38:56]   GPU count: 1
[codecarbon INFO @ 12:38:56]   GPU model: 1 x NVIDIA GeForce RTX 4060 Laptop GPU


1566074 274797 11365


[codecarbon INFO @ 12:39:11] Energy consumed for RAM : 0.000050 kWh. RAM Power : 11.8877534866333 W


1464757 173480 11344
1463706 173480 10869
1463649 173423 10869
1463645 173423 10868
1463645 173423 10868


[codecarbon INFO @ 12:39:12] Energy consumed for all GPUs : 0.000018 kWh. Total GPU Power : 4.1905319409242345 W
[codecarbon INFO @ 12:39:12] Energy consumed for all CPUs : 0.000186 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 12:39:12] 0.000253 kWh of electricity used since the beginning.


1463645 173423 10868


  dt.datetime.utcfromtimestamp(data.Time.min() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.max() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.min() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.max() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.min() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.max() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.min() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.max() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.min() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.max() / 1000).strftime(
[codecarbon INFO @ 12:39:18] Energy consumed for RAM : 0.000070 kWh. RAM Power : 11.8877534866333 W
[codecarbon INFO @ 12:39:18] Energy consumed for all GPUs : 0.000033 kWh. Total GPU Power : 9.08076534645388 W
[codecarbon INFO @ 12:39:18] Energy consumed for all CPUs : 0.000258 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 12:39:18] 0.000361 kW

                                             Dataset  NumEvents  NumSessions  \
0      coveo_ecommerce\coveo_processed_view_full.tsv    1463645       173423   
1      coveo_ecommerce\coveo_processed_view_test.tsv      52501         7748   
2  coveo_ecommerce\coveo_processed_view_train_ful...    1411113       165673   
3  coveo_ecommerce\coveo_processed_view_train_tr.tsv    1368003       159766   
4  coveo_ecommerce\coveo_processed_view_train_val...      43032         5905   

   NumItems    NumDays                   StartTime  \
0     10868  17.999833  2018-12-08 00:00:11.994000   
1      8230   0.998696  2018-12-25 00:01:50.223000   
2     10868  16.999566  2018-12-08 00:00:11.994000   
3     10868  15.999713  2018-12-08 00:00:11.994000   
4      8014   0.997503  2018-12-24 00:03:10.240000   

                      EndTime  AvgItemViews  MinSessionLength  \
0  2018-12-25 23:59:57.577000    134.674733                 2   
1  2018-12-25 23:59:57.577000      6.379222                 2   

## Use a specific setup for your dataset

In [12]:
params = setups["coveo"]["params_bprmax"]

In [13]:
train_path = os.path.join(dataset_path,"coveo_processed_view_train_full.tsv")
test_path = os.path.join(dataset_path,"coveo_processed_view_test.tsv")

In [14]:
def create_gru4rec_pytorch_script(model_name, train_folder, train_data, test_data, model_path, loss, optim, final_act, layers, batch_size, dropout_p_embed, dropout_p_hidden, learning_rate, n_epochs, m, eval_hidden_reset, use_correct_loss, use_correct_mask_reset):
    checkpoint_dir = f"{model_path}\\{model_name}"
    s_train_full = (
        f"python ..\\GRU4REC-pytorch\\main.py --data_folder {train_folder} "
        f"--train_data {train_data} --valid_data {test_data} --checkpoint_dir {checkpoint_dir} "
        f"--num_layers 1 --embedding_dim {layers} --hidden_size {layers} "
        f"--loss_type {'BPR-max' if loss == 'bpr-max' else 'CrossEntropy'} --final_act {final_act} "
        f"--n_epochs {n_epochs} --batch_size {batch_size} --dropout_input {dropout_p_embed} "
        f"--dropout_hidden {dropout_p_hidden} --lr {learning_rate} --momentum 0.0 "
        f"--optimizer_type {'Adagrad' if optim == 'adagrad' else ''}"
        f"{' --eval_hidden_reset' if eval_hidden_reset else ''}"
        f"{' --use_correct_loss' if use_correct_loss else ''}"
        f"{' --use_correct_mask_reset' if use_correct_mask_reset else ''}"
    )
    s_test_full = s_train_full + f" --is_eval --load_model {checkpoint_dir}\\model_0000{n_epochs-1}.pt --m {m}"
    return s_train_full, s_test_full

In [77]:
# Obtenemos la configuración de los parámetros para el entrenamiento
loss = params["loss"]
optim = params["optim"]
const_emb = params["constrained_embedding"]
embed = params["embedding"]
final_act = params["final_act"]
layers = params["layers"]
batch_size = params["batch_size"]
dropout_p_embed = params["dropout_p_embed"]
dropout_p_hidden = params["dropout_p_hidden"]
learning_rate = params["learning_rate"]
momentum = params["momentum"]
sample_alpha = params["sample_alpha"]
bpreg = params["bpreg"]
logq = params["logq"]
hidden_act = params["hidden_act"]
n_epochs = 5
m = '1 5 10 20'

## Train & test the out-of-the-box model

In [16]:
train_folder, train_data = '/'.join(train_path.split('/')[:-1]), train_path.split('/')[-1]
test_folder, test_data = '/'.join(test_path.split('/')[:-1]), test_path.split('/')[-1]

In [17]:
train_script_oob, test_script_oob = create_gru4rec_pytorch_script(model_name='gru4rec_pytorch_oob_bprmax', train_folder=train_folder, train_data=train_data, test_data=test_data, model_path=model_path, loss=loss, optim=optim, final_act=final_act, layers=layers, batch_size=batch_size, dropout_p_embed=0.0, dropout_p_hidden=0.0, learning_rate=learning_rate, n_epochs=n_epochs, m=m, eval_hidden_reset=False, use_correct_loss=False, use_correct_mask_reset=False)

### Train the out-of-the-box model

In [103]:
emissions = track_training_C02_emissions(train_script_oob, "gru4rec_pytorch_oob_bprmax")
# Imprimimos las emisiones de carbono con estilo
if emissions is not None:
    display(HTML(f"<h2 style='color: green;'>Emisiones de CO2: {emissions} kg</h2>"))
else:
    display(HTML("<h2 style='color: red;'>Hubo un error durante la ejecución del comando.</h2>"))

[codecarbon INFO @ 17:00:48] [setup] RAM Tracking...
[codecarbon INFO @ 17:00:48] [setup] GPU Tracking...
[codecarbon INFO @ 17:00:48] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 17:00:49] [setup] CPU Tracking...
[codecarbon INFO @ 17:00:51] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i7-13700HX
[codecarbon INFO @ 17:00:51] >>> Tracker's metadata:
[codecarbon INFO @ 17:00:51]   Platform system: Windows-11-10.0.22631-SP0
[codecarbon INFO @ 17:00:51]   Python version: 3.12.3
[codecarbon INFO @ 17:00:51]   CodeCarbon version: 2.4.2
[codecarbon INFO @ 17:00:51]   Available RAM : 31.701 GB
[codecarbon INFO @ 17:00:51]   CPU count: 24
[codecarbon INFO @ 17:00:51]   CPU model: 13th Gen Intel(R) Core(TM) i7-13700HX
[codecarbon INFO @ 17:00:51]   GPU count: 1
[codecarbon INFO @ 17:00:51]   GPU model: 1 x NVIDIA GeForce RTX 4060 Laptop GPU
[codecarbon INFO @ 17:01:06] Energy consumed for RAM : 0.000050 kWh. RAM Power : 11.8877534866333 W
[codecarbon INFO @ 17:01:06] 

Salida de STDOUT: Using CUDA: True
                      Args                                             Values
0              hidden_size                                                512
1               num_layers                                                  1
2               batch_size                                                144
3            dropout_input                                                0.0
4           dropout_hidden                                                0.0
5                 n_epochs                                                  5
6                        m                                               [20]
7           optimizer_type                                            Adagrad
8                final_act                                              elu-1
9                       lr                                               0.05
10            weight_decay                                                  0
11                momentum   

### Test the out-of-the-box model

In [81]:
import subprocess

try:
    # Ejecutar el comando
    result = subprocess.run(test_script_oob, shell=True, capture_output=True, text=True)
    # Imprimir la salida estándar
    print(result.stdout)

    # Imprimir el código de retorno
    print(result.returncode)
except FileNotFoundError as e:
    print(f"Error: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")


Using CUDA: True
                      Args                                             Values
0              hidden_size                                                512
1               num_layers                                                  1
2               batch_size                                                144
3            dropout_input                                                0.0
4           dropout_hidden                                                0.0
5                 n_epochs                                                  5
6                        m                                     [1, 5, 10, 20]
7           optimizer_type                                            Adagrad
8                final_act                                              elu-1
9                       lr                                               0.05
10            weight_decay                                                  0
11                momentum                     

## Train & test inference fix model

In [82]:
train_script_inffix, test_script_inffix = create_gru4rec_pytorch_script(model_name='gru4rec_pytorch_inffix_bprmax', train_folder=train_folder, train_data=train_data, test_data=test_data, model_path=model_path, loss=loss, optim=optim, final_act=final_act, layers=layers, batch_size=batch_size, dropout_p_embed=0.0, dropout_p_hidden=0.0, learning_rate=learning_rate, n_epochs=n_epochs, m=m, eval_hidden_reset=True, use_correct_loss=False, use_correct_mask_reset=False)

### Train the out-of-the-box eval fix model

In [92]:
emissions = track_training_C02_emissions(train_script_inffix, "gru4rec_pytorch_inffix_bprmax")
if emissions is not None:
    display(HTML(f"<h2 style='color: green;'>Emisiones de CO2: {emissions} kg</h2>"))
else:
    display(HTML("<h2 style='color: red;'>Hubo un error durante la ejecución del comando.</h2>"))

[codecarbon INFO @ 16:21:39] [setup] RAM Tracking...
[codecarbon INFO @ 16:21:39] [setup] GPU Tracking...
[codecarbon INFO @ 16:21:39] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 16:21:40] [setup] CPU Tracking...
[codecarbon INFO @ 16:21:42] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i7-13700HX
Traceback (most recent call last):
  File "c:\Users\Juanc\AppData\Local\Programs\Python\Python312\Lib\site-packages\codecarbon\core\gpu.py", line 238, in get_gpu_details
    devices_info.append(gpu_device.get_gpu_details())
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Juanc\AppData\Local\Programs\Python\Python312\Lib\site-packages\codecarbon\core\gpu.py", line 75, in get_gpu_details
    "power_usage": self._get_power_usage(),
                   ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Juanc\AppData\Local\Programs\Python\Python312\Lib\site-packages\codecarbon\core\gpu.py", line 127, in _get_power_usage
    return pynvml.nvmlDeviceGetPowerUsag

Salida de STDOUT: Using CUDA: True
                      Args                                             Values
0              hidden_size                                                512
1               num_layers                                                  1
2               batch_size                                                144
3            dropout_input                                                0.0
4           dropout_hidden                                                0.0
5                 n_epochs                                                  5
6                        m                                               [20]
7           optimizer_type                                            Adagrad
8                final_act                                              elu-1
9                       lr                                               0.05
10            weight_decay                                                  0
11                momentum   

### Test the out-of-the-box eval fix model

In [75]:
import subprocess

try:
    # Ejecutar el comando
    result = subprocess.run(test_script_inffix, shell=True, capture_output=True, text=True)
    # Imprimir la salida estándar
    print(result.stdout)
except FileNotFoundError as e:
    print(f"Error: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")


Using CUDA: True
                      Args                                             Values
0              hidden_size                                                512
1               num_layers                                                  1
2               batch_size                                                144
3            dropout_input                                                0.0
4           dropout_hidden                                                0.0
5                 n_epochs                                                  5
6                        m                                     [1, 5, 10, 20]
7           optimizer_type                                            Adagrad
8                final_act                                              elu-1
9                       lr                                               0.05
10            weight_decay                                                  0
11                momentum                     

## Train & test the major fix model

In [104]:
train_script_majorfix, test_script_majorfix = create_gru4rec_pytorch_script(model_name='gru4rec_pytorch_majorfix_bprmax', train_folder=train_folder, train_data=train_data, test_data=test_data, model_path=model_path, loss=loss, optim=optim, final_act=final_act, layers=layers, batch_size=batch_size, dropout_p_embed=dropout_p_embed, dropout_p_hidden=dropout_p_hidden, learning_rate=learning_rate, n_epochs=n_epochs, m=m, eval_hidden_reset=True, use_correct_loss=True, use_correct_mask_reset=True)

### Train the major fix model

In [105]:
emissions = track_training_C02_emissions(train_script_majorfix, "gru4rec_pytorch_majorfix_bprmax")
if emissions is not None:
    display(HTML(f"<h2 style='color: green;'>Emisiones de CO2: {emissions} kg</h2>"))
else:
    display(HTML("<h2 style='color: red;'>Hubo un error durante la ejecución del comando.</h2>"))

[codecarbon INFO @ 17:05:53] [setup] RAM Tracking...
[codecarbon INFO @ 17:05:53] [setup] GPU Tracking...
[codecarbon INFO @ 17:05:53] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 17:05:54] [setup] CPU Tracking...
[codecarbon INFO @ 17:05:56] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i7-13700HX
Traceback (most recent call last):
  File "c:\Users\Juanc\AppData\Local\Programs\Python\Python312\Lib\site-packages\codecarbon\core\gpu.py", line 238, in get_gpu_details
    devices_info.append(gpu_device.get_gpu_details())
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Juanc\AppData\Local\Programs\Python\Python312\Lib\site-packages\codecarbon\core\gpu.py", line 75, in get_gpu_details
    "power_usage": self._get_power_usage(),
                   ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Juanc\AppData\Local\Programs\Python\Python312\Lib\site-packages\codecarbon\core\gpu.py", line 127, in _get_power_usage
    return pynvml.nvmlDeviceGetPowerUsag

Salida de STDOUT: Using CUDA: True
                      Args                                             Values
0              hidden_size                                                512
1               num_layers                                                  1
2               batch_size                                                144
3            dropout_input                                               0.35
4           dropout_hidden                                                0.0
5                 n_epochs                                                  5
6                        m                                               [20]
7           optimizer_type                                            Adagrad
8                final_act                                              elu-1
9                       lr                                               0.05
10            weight_decay                                                  0
11                momentum   

### Test the major fix model

In [106]:
import subprocess

try:
    # Ejecutar el comando
    result = subprocess.run(test_script_majorfix, shell=True, capture_output=True, text=True)
    # Imprimir la salida estándar
    print(result.stdout)
except FileNotFoundError as e:
    print(f"Error: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")


Using CUDA: True
                      Args                                             Values
0              hidden_size                                                512
1               num_layers                                                  1
2               batch_size                                                144
3            dropout_input                                               0.35
4           dropout_hidden                                                0.0
5                 n_epochs                                                  5
6                        m                                     [1, 5, 10, 20]
7           optimizer_type                                            Adagrad
8                final_act                                              elu-1
9                       lr                                               0.05
10            weight_decay                                                  0
11                momentum                     