In [24]:
import os
from experiment_setup import setups
from model_tracker import track_training_C02_emissions

In [43]:
# download the dataset, the links can be fund in the README
dataset = "coveo"
dataset_path = f"../datasets/{dataset}"
model_path = "../trained_models"

## Run the preprocess script, specific to the dataset you chose

- The preprocessing script in general, executes the following steps:
    - Loads the raw data, with correct types
    - Creates the sessions
    - Removes duplicated items. An item is considered as a duplicate if the preceding (based on time) event in the same session contains the exact same item.
    - Performes iterative support filtering
        - Removes sessions with only one event
        - Removes items with less than 5 events
        - Until the size of the dataset changes


In [None]:
%run ../Preprocess/coveo_preproc.py --path $dataset_path

## Use a specific setup for your dataset

In [44]:
params = setups[dataset]["params_xe"]

In [45]:
train_path = os.path.join(dataset_path,f"{dataset}_processed_view_train_full.tsv")
test_path = os.path.join(dataset_path,f"{dataset}_processed_view_test.tsv")

In [46]:
def create_keras_gru4rec_script(model_name, train_path, test_path, model_path, loss, optim, layers, batch_size, dropout_p_hidden, learning_rate, n_epochs, m, use_correct_mask_reset):
    s_train_full = f"python ../KerasGRU4Rec/model/gru4rec.py --train_path {train_path} --test_path {test_path} --save_path {model_path}/{model_name} --lr {learning_rate} --hidden_size {layers} --dropout_p_hidden {dropout_p_hidden} --batch_size {batch_size}  --optim {optim} --epochs {n_epochs} {'--use_correct_mask_reset' if use_correct_mask_reset else ''}"
    s_test_full = f"python ../KerasGRU4Rec/model/gru4rec.py --train_path {train_path} --test_path {test_path} --resume {model_path}/KerasGRU4Rec/temp/GRU4REC_{n_epochs-1}.h5 --batch_size {batch_size} --eval_only True --m {m}"
    return s_train_full, s_test_full

In [47]:
loss = params["loss"]
optim = params["optim"]
const_emb = params["constrained_embedding"]
embed = params["embedding"]
final_act = params["final_act"]
layers = params["layers"]
batch_size = params["batch_size"]
dropout_p_embed = params["dropout_p_embed"]
dropout_p_hidden = params["dropout_p_hidden"]
learning_rate = params["learning_rate"]
momentum = params["momentum"]
sample_alpha = params["sample_alpha"]
bpreg = params["bpreg"]
logq = params["logq"]
hidden_act = params["hidden_act"]
n_epochs = 5
m = '1 5 10 20'

## Train & test the out-of-the-box model

### Train the out-of-the-box model

In [15]:
train_script_oob, test_script_oob = create_keras_gru4rec_script(model_name='keras_gru4rec_oob', train_path=train_path, test_path=test_path, model_path=model_path, loss=loss, optim='adam', layers=100, batch_size=batch_size, dropout_p_hidden=0.25, learning_rate=0.001, n_epochs=n_epochs, m=m, use_correct_mask_reset=False)

In [16]:
print(train_script_oob)

python ../KerasGRU4Rec/model/gru4rec.py --train_path ../datasets/diginetica\diginetica_processed_view_train_full.tsv --test_path ../datasets/diginetica\diginetica_processed_view_test.tsv --save_path ../trained_models/keras_gru4rec_oob --lr 0.001 --hidden_size 100 --dropout_p_hidden 0.25 --batch_size 128  --optim adam --epochs 5 


In [8]:
track_training_C02_emissions(train_script_oob, "keras_gru4rec_oob", dataset)

[codecarbon INFO @ 20:16:03] [setup] RAM Tracking...
[codecarbon INFO @ 20:16:03] [setup] GPU Tracking...
[codecarbon INFO @ 20:16:05] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 20:16:05] [setup] CPU Tracking...
[codecarbon INFO @ 20:16:06] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i9-13900HX
[codecarbon INFO @ 20:16:07] >>> Tracker's metadata:
[codecarbon INFO @ 20:16:07]   Platform system: Windows-11-10.0.22631-SP0
[codecarbon INFO @ 20:16:07]   Python version: 3.12.3
[codecarbon INFO @ 20:16:07]   CodeCarbon version: 2.4.2
[codecarbon INFO @ 20:16:07]   Available RAM : 31.746 GB
[codecarbon INFO @ 20:16:07]   CPU count: 32
[codecarbon INFO @ 20:16:07]   CPU model: 13th Gen Intel(R) Core(TM) i9-13900HX
[codecarbon INFO @ 20:16:07]   GPU count: 1
[codecarbon INFO @ 20:16:07]   GPU model: 1 x NVIDIA GeForce RTX 4090 Laptop GPU


CarbonTracker: The following components were found: GPU with device(s) NVIDIA GeForce RTX 4090 Laptop GPU.


[codecarbon INFO @ 20:16:20] Energy consumed for RAM : 0.000033 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 20:16:20] Energy consumed for all GPUs : 0.000119 kWh. Total GPU Power : 42.62807491466081 W
[codecarbon INFO @ 20:16:20] Energy consumed for all CPUs : 0.000123 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 20:16:20] 0.000275 kWh of electricity used since the beginning.


CarbonTracker: Average carbon intensity during training was 374.46 gCO2/kWh at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: 
Actual consumption for 1 epoch(s):
	Time:	0:00:10
	Energy:	0.002565742890 kWh
	CO2eq:	0.960776369946 g
	This is equivalent to:
	0.008937454604 km travelled by car
CarbonTracker: Live carbon intensity could not be fetched at detected location: Santiago, Santiago Metropolitan, CL. Defaulted to average carbon intensity for CL in 2021 of 374.46 gCO2/kWh. at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: 
Predicted consumption for 1 epoch(s):
	Time:	0:00:10
	Energy:	0.002565742890 kWh
	CO2eq:	0.960776369946 g
	This is equivalent to:
	0.008937454604 km travelled by car
CarbonTracker: Finished monitoring.
Salida de STDOUT:                       Args                                             Values
0                   resume                                               None
1               train_path  ../datasets/coveo

9.13957657705152e-05

### Test the out-of-the-box model

In [None]:
os.system(test_script_oob)

## Train & test the minor fix model


In [None]:
train_script_minor, test_script_minor = create_keras_gru4rec_script(model_name='keras_gru4rec_minorfix', train_path=train_path, test_path=test_path, model_path=model_path, loss=loss, optim=optim, layers=layers, batch_size=batch_size, dropout_p_hidden=dropout_p_hidden, learning_rate=learning_rate, n_epochs=n_epochs, m=m, use_correct_mask_reset=False)

### Train the minor fix model

In [None]:
os.system(train_script_minor)

### Test the minor fix model

In [None]:
os.system(test_script_minor)

## Train & test the major fix model

In [48]:
train_script_major, test_script_major = create_keras_gru4rec_script(model_name='keras_gru4rec_majorfix', test_path=test_path, train_path=train_path, model_path=model_path, loss=loss, optim=optim, layers=layers, batch_size=batch_size, dropout_p_hidden=dropout_p_hidden, learning_rate=learning_rate, n_epochs=n_epochs, m=m, use_correct_mask_reset=True)

### Train the major fix model

In [49]:
track_training_C02_emissions(train_script_major, "keras_gru4rec_majorfix", dataset)

[codecarbon INFO @ 21:16:57] [setup] RAM Tracking...
[codecarbon INFO @ 21:16:57] [setup] GPU Tracking...
[codecarbon INFO @ 21:16:57] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 21:16:59] [setup] CPU Tracking...
[codecarbon INFO @ 21:17:00] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i9-13900HX
Traceback (most recent call last):
  File "c:\Users\EVILAB\AppData\Local\Programs\Python\Python312\Lib\site-packages\codecarbon\core\gpu.py", line 238, in get_gpu_details
    devices_info.append(gpu_device.get_gpu_details())
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\EVILAB\AppData\Local\Programs\Python\Python312\Lib\site-packages\codecarbon\core\gpu.py", line 75, in get_gpu_details
    "power_usage": self._get_power_usage(),
                   ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\EVILAB\AppData\Local\Programs\Python\Python312\Lib\site-packages\codecarbon\core\gpu.py", line 127, in _get_power_usage
    return pynvml.nvmlDeviceGetPowerU

### Test the major fix model

In [41]:
print(test_script_major)
# os.system(test_script_major)

python ../KerasGRU4Rec/model/gru4rec.py --train_path ../datasets/retailrocket\retailrocket_processed_view_train_full.tsv --test_path ../datasets/retailrocket\retailrocket_processed_view_test.tsv --resume ../trained_models/KerasGRU4Rec/temp/GRU4REC_4.h5 --batch_size 240 --eval_only True --m 1 5 10 20
