In [1]:
import os
from experiment_setup import setups

In [2]:
# download the dataset, the links can be fund in the README
dataset_path = "../datasets/diginetica"
model_path = "../trained_models"

## Run the preprocess script, specific to the dataset you chose

- The preprocessing script in general, executes the following steps:
    - Loads the raw data, with correct types
    - Creates the sessions
    - Removes duplicated items. An item is considered as a duplicate if the preceding (based on time) event in the same session contains the exact same item.
    - Performes iterative support filtering
        - Removes sessions with only one event
        - Removes items with less than 5 events
        - Until the size of the dataset changes


In [None]:
%run coveo_preproc.py --path $dataset_path

## Use a specific setup for your dataset

In [3]:
params = setups["diginetica"]["params_bprmax"]

In [4]:
train_path = os.path.join(dataset_path,"diginetica_processed_view_train_full.tsv")
test_path = os.path.join(dataset_path,"diginetica_processed_view_test.tsv")

In [5]:
def create_recpack_script(model_name, train_path, test_path, loss, optim, layers, embedding, hidden_size, batch_size, dropout_p_embed, dropout_p_hidden, learning_rate, bpreg, n_epochs, n_sample, m, use_correct_weight_init):
    s_train_test_full = (
        f"python ../recpack/main.py --train_path {train_path} "
        f"--test_path {test_path} --m {m} --n_epochs {n_epochs} --loss {loss} --optim {optim} "
        f"--layers {layers} --embedding {embedding} --hidden_size {hidden_size} "
        f"--batch_size {batch_size} --dropout_p_embed {dropout_p_embed} "
        f"--dropout_p_hidden {dropout_p_hidden} --learning_rate {learning_rate}"
        f"{f' --n_sample {n_sample}' if loss=='bpr-max' else ''}"
        f"{' --use_correct_weight_init' if use_correct_weight_init else ''}"
        f"{f' --bpreg {bpreg}' if loss=='bpr-max' else ''}"
    )
    return s_train_test_full

In [7]:
loss = params["loss"]
optim = params["optim"]
const_emb = params["constrained_embedding"]
embed = params["embedding"]
final_act = params["final_act"]
layers = params["layers"]
batch_size = params["batch_size"]
dropout_p_embed = params["dropout_p_embed"]
dropout_p_hidden = params["dropout_p_hidden"]
learning_rate = params["learning_rate"]
momentum = params["momentum"]
sample_alpha = params["sample_alpha"]
bpreg = params["bpreg"]
logq = params["logq"]
hidden_act = params["hidden_act"]
n_sample = params["n_sample"]
n_epochs = 5
m = '1 5 10 20'

## Train & test the out-of-the-box model

In [8]:
train_test_script_oob = create_recpack_script(model_name='recpack_oob_bprmax', train_path=train_path, test_path=test_path,loss=loss, optim=optim, layers=1, embedding=layers, hidden_size=layers, batch_size=batch_size, dropout_p_embed=dropout_p_embed, dropout_p_hidden=dropout_p_embed, learning_rate=learning_rate, bpreg=1.0, n_epochs=n_epochs, n_sample=n_sample, m=m, use_correct_weight_init=False)

In [9]:
from model_tracker import track_training_C02_emissions

track_training_C02_emissions(train_test_script_oob, "recpack_oob_bprmax", "diginetica")

[codecarbon INFO @ 21:59:22] [setup] RAM Tracking...
[codecarbon INFO @ 21:59:22] [setup] GPU Tracking...
[codecarbon INFO @ 21:59:23] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 21:59:23] [setup] CPU Tracking...
[codecarbon INFO @ 21:59:25] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i9-13900HX
[codecarbon INFO @ 21:59:25] >>> Tracker's metadata:
[codecarbon INFO @ 21:59:25]   Platform system: Windows-11-10.0.22631-SP0
[codecarbon INFO @ 21:59:25]   Python version: 3.12.3
[codecarbon INFO @ 21:59:25]   CodeCarbon version: 2.4.2
[codecarbon INFO @ 21:59:25]   Available RAM : 31.746 GB
[codecarbon INFO @ 21:59:25]   CPU count: 32
[codecarbon INFO @ 21:59:25]   CPU model: 13th Gen Intel(R) Core(TM) i9-13900HX
[codecarbon INFO @ 21:59:25]   GPU count: 1
[codecarbon INFO @ 21:59:25]   GPU model: 1 x NVIDIA GeForce RTX 4090 Laptop GPU


CarbonTracker: The following components were found: GPU with device(s) NVIDIA GeForce RTX 4090 Laptop GPU.


[codecarbon INFO @ 21:59:44] Energy consumed for RAM : 0.000051 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 21:59:44] Energy consumed for all GPUs : 0.000235 kWh. Total GPU Power : 54.88129198857531 W
[codecarbon INFO @ 21:59:44] Energy consumed for all CPUs : 0.000182 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 21:59:44] 0.000468 kWh of electricity used since the beginning.
[codecarbon INFO @ 21:59:59] Energy consumed for RAM : 0.000101 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 21:59:59] Energy consumed for all GPUs : 0.000524 kWh. Total GPU Power : 69.43805317571696 W
[codecarbon INFO @ 21:59:59] Energy consumed for all CPUs : 0.000359 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 21:59:59] 0.000984 kWh of electricity used since the beginning.
[codecarbon INFO @ 22:00:14] Energy consumed for RAM : 0.000150 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 22:00:14] Energy consumed for all GPUs : 0.000818 kWh. Total GPU Power : 70.35206997298



[codecarbon INFO @ 22:14:29] Energy consumed for RAM : 0.002977 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 22:14:29] Energy consumed for all GPUs : 0.017554 kWh. Total GPU Power : 70.53790364565674 W
[codecarbon INFO @ 22:14:29] Energy consumed for all CPUs : 0.010632 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 22:14:29] 0.031163 kWh of electricity used since the beginning.
[codecarbon INFO @ 22:14:44] Energy consumed for RAM : 0.003026 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 22:14:44] Energy consumed for all GPUs : 0.017847 kWh. Total GPU Power : 70.2434426482984 W
[codecarbon INFO @ 22:14:44] Energy consumed for all CPUs : 0.010809 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 22:14:44] 0.031683 kWh of electricity used since the beginning.
[codecarbon INFO @ 22:14:59] Energy consumed for RAM : 0.003076 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 22:14:59] Energy consumed for all GPUs : 0.018140 kWh. Total GPU Power : 70.397284202523



[codecarbon INFO @ 22:29:29] Energy consumed for RAM : 0.005952 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 22:29:29] Energy consumed for all GPUs : 0.035017 kWh. Total GPU Power : 38.15110380122733 W
[codecarbon INFO @ 22:29:29] Energy consumed for all CPUs : 0.021261 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 22:29:29] 0.062230 kWh of electricity used since the beginning.
Exception in thread Thread-11 (_readerthread):
Traceback (most recent call last):
  File "c:\Users\EVILAB\AppData\Local\Programs\Python\Python312\Lib\threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "C:\Users\EVILAB\AppData\Roaming\Python\Python312\site-packages\ipykernel\ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "c:\Users\EVILAB\AppData\Local\Programs\Python\Python312\Lib\threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "c:\Users\EVILAB\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1599,

CarbonTracker: Average carbon intensity during training was 374.46 gCO2/kWh at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: 
Actual consumption for 1 epoch(s):
	Time:	0:30:05
	Energy:	0.056582255519 kWh
	CO2eq:	21.187974162433 g
	This is equivalent to:
	0.197097434069 km travelled by car
CarbonTracker: Live carbon intensity could not be fetched at detected location: Santiago, Santiago Metropolitan, CL. Defaulted to average carbon intensity for CL in 2021 of 374.46 gCO2/kWh. at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: 
Predicted consumption for 1 epoch(s):
	Time:	0:30:05
	Energy:	0.056582255519 kWh
	CO2eq:	21.187974162433 g
	This is equivalent to:
	0.197097434069 km travelled by car
CarbonTracker: Finished monitoring.
Salida de STDOUT:                        Args                                             Values
0               cuda_dev_id                                                  0
1                train_path  ../datasets/

0.020728771553344297

## Train & test the minor fix model

In [None]:
train_test_script_minorfix = create_recpack_script(model_name='recpack_minorfix_bprmax', train_path=train_path, test_path=test_path, loss=loss, optim=optim, layers=1, embedding=layers, hidden_size=layers, batch_size=batch_size, dropout_p_embed=dropout_p_embed, dropout_p_hidden=dropout_p_embed, learning_rate=learning_rate, bpreg=bpreg, n_epochs=n_epochs, n_sample=n_sample, m=m, use_correct_weight_init=True)

In [None]:
os.system(train_test_script_minorfix)

## Train & test the major fix model

In [None]:
train_test_script_majorfix = create_recpack_script(model_name='recpack_majorfix_bprmax', train_path=train_path, test_path=test_path, loss=loss, optim=optim, layers=1, embedding=layers, hidden_size=layers, batch_size=batch_size, dropout_p_embed=dropout_p_embed, dropout_p_hidden=dropout_p_hidden, learning_rate=learning_rate, bpreg=bpreg, n_epochs=n_epochs, n_sample=n_sample, m=m, use_correct_weight_init=True)

In [None]:
os.system(train_test_script_majorfix)