In [28]:
import os
from experiment_setup import setups

In [29]:
# download the dataset, the links can be fund in the README
dataset_path = "../datasets/diginetica"
model_path = "../trained_models"

## Run the preprocess script, specific to the dataset you chose

- The preprocessing script in general, executes the following steps:
    - Loads the raw data, with correct types
    - Creates the sessions
    - Removes duplicated items. An item is considered as a duplicate if the preceding (based on time) event in the same session contains the exact same item.
    - Performes iterative support filtering
        - Removes sessions with only one event
        - Removes items with less than 5 events
        - Until the size of the dataset changes


In [4]:
%run ../Preprocess/coveo_preproc.py --path $dataset_path

1566074 274797 11365
1464757 173480 11344
1463706 173480 10869
1463649 173423 10869
1463645 173423 10868
1463645 173423 10868
1463645 173423 10868


  dt.datetime.utcfromtimestamp(data.Time.min() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.max() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.min() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.max() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.min() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.max() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.min() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.max() / 1000).strftime(


                                             Dataset  NumEvents  NumSessions  \
0      coveo_ecommerce\coveo_processed_view_full.tsv    1463645       173423   
1      coveo_ecommerce\coveo_processed_view_test.tsv      52501         7748   
2  coveo_ecommerce\coveo_processed_view_train_ful...    1411113       165673   
3  coveo_ecommerce\coveo_processed_view_train_tr.tsv    1368003       159766   
4  coveo_ecommerce\coveo_processed_view_train_val...      43032         5905   

   NumItems    NumDays                   StartTime  \
0     10868  17.999833  2018-12-08 00:00:11.994000   
1      8230   0.998696  2018-12-25 00:01:50.223000   
2     10868  16.999566  2018-12-08 00:00:11.994000   
3     10868  15.999713  2018-12-08 00:00:11.994000   
4      8014   0.997503  2018-12-24 00:03:10.240000   

                      EndTime  AvgItemViews  MinSessionLength  \
0  2018-12-25 23:59:57.577000    134.674733                 2   
1  2018-12-25 23:59:57.577000      6.379222                 2   

  dt.datetime.utcfromtimestamp(data.Time.min() / 1000).strftime(
  dt.datetime.utcfromtimestamp(data.Time.max() / 1000).strftime(


## Use a specific setup for your dataset

In [24]:
params = setups["diginetica"]["params_xe"]

In [25]:
train_path = os.path.join(dataset_path,"diginetica_processed_view_train_full.tsv")
test_path = os.path.join(dataset_path,"diginetica_processed_view_test.tsv")

In [26]:
def create_gru4rec_tensorflow_script(model_name, train_path, test_path, model_path, loss, final_act, layers, batch_size, dropout_p_hidden, learning_rate, hidden_act, n_epochs, m, decay, initial_accumulator):
        """Creates the train and test script.
        """
        s_train_full = (
                f"python ../GRU4Rec_TensorFlow/gru4rec_BP/main.py --train_path {train_path} "
                f"--test_path {test_path} --checkpoint_dir {model_path}/{model_name} --layer {1} --size {layers} "
                f"--epoch {n_epochs} --lr {learning_rate} --hidden_act {hidden_act} --final_act {final_act} "
                f"--loss {loss} --dropout {dropout_p_hidden} --batch_size {batch_size} "
                f"--initial_accumulator_value {initial_accumulator} --decay {decay}"
        )
        s_test_full = s_train_full + f" --train 0 --test {n_epochs-1} --m {m}"
        return s_train_full, s_test_full

In [27]:
loss = params["loss"]
optim = params["optim"]
const_emb = params["constrained_embedding"]
embed = params["embedding"]
final_act = params["final_act"]
layers = params["layers"]
batch_size = params["batch_size"]
dropout_p_embed = params["dropout_p_embed"]
dropout_p_hidden = params["dropout_p_hidden"]
learning_rate = params["learning_rate"]
momentum = params["momentum"]
sample_alpha = params["sample_alpha"]
bpreg = params["bpreg"]
logq = params["logq"]
hidden_act = params["hidden_act"]
n_epochs = 5
m = '1 5 10 20'

## Train & test the out-of-the-box model

In [30]:
train_script_oob, test_script_oob = create_gru4rec_tensorflow_script(model_name="gru4rec_tensorflow_oob", train_path=train_path, test_path=test_path, model_path=model_path, loss=loss, final_act=final_act, layers=layers, batch_size=50, dropout_p_hidden=dropout_p_hidden, learning_rate=learning_rate, hidden_act=hidden_act, n_epochs=n_epochs, m=m, decay=0.96, initial_accumulator=1e-1)

### Train the out-of-the-box model

In [32]:
from model_tracker import track_training_C02_emissions

track_training_C02_emissions(train_script_oob, "gru4rec_tensorflow_oob", "diginetica")

[codecarbon INFO @ 20:51:32] [setup] RAM Tracking...
[codecarbon INFO @ 20:51:32] [setup] GPU Tracking...
[codecarbon INFO @ 20:51:32] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 20:51:34] [setup] CPU Tracking...
[codecarbon INFO @ 20:51:35] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i9-13900HX
[codecarbon INFO @ 20:51:35] >>> Tracker's metadata:
[codecarbon INFO @ 20:51:35]   Platform system: Windows-11-10.0.22631-SP0
[codecarbon INFO @ 20:51:35]   Python version: 3.12.3
[codecarbon INFO @ 20:51:35]   CodeCarbon version: 2.4.2
[codecarbon INFO @ 20:51:35]   Available RAM : 31.746 GB
[codecarbon INFO @ 20:51:35]   CPU count: 32
[codecarbon INFO @ 20:51:35]   CPU model: 13th Gen Intel(R) Core(TM) i9-13900HX
[codecarbon INFO @ 20:51:35]   GPU count: 1
[codecarbon INFO @ 20:51:35]   GPU model: 1 x NVIDIA GeForce RTX 4090 Laptop GPU


CarbonTracker: The following components were found: GPU with device(s) NVIDIA GeForce RTX 4090 Laptop GPU.
CarbonTracker: The following components were found: GPU with device(s) NVIDIA GeForce RTX 4090 Laptop GPU.
CarbonTracker: The following components were found: GPU with device(s) NVIDIA GeForce RTX 4090 Laptop GPU.
CarbonTracker: The following components were found: GPU with device(s) NVIDIA GeForce RTX 4090 Laptop GPU.


[codecarbon INFO @ 20:51:54] Energy consumed for RAM : 0.000050 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 20:51:54] Energy consumed for all GPUs : 0.000413 kWh. Total GPU Power : 99.09882924052263 W
[codecarbon INFO @ 20:51:54] Energy consumed for all CPUs : 0.000182 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 20:51:54] 0.000645 kWh of electricity used since the beginning.
[codecarbon INFO @ 20:52:09] Energy consumed for RAM : 0.000098 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 20:52:09] Energy consumed for all GPUs : 0.000669 kWh. Total GPU Power : 62.966113901585366 W
[codecarbon INFO @ 20:52:09] Energy consumed for all CPUs : 0.000359 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 20:52:09] 0.001126 kWh of electricity used since the beginning.
[codecarbon INFO @ 20:52:24] Energy consumed for RAM : 0.000146 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 20:52:24] Energy consumed for all GPUs : 0.000860 kWh. Total GPU Power : 47.2391967917

CarbonTracker: Average carbon intensity during training was 374.46 gCO2/kWh at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: Average carbon intensity during training was 374.46 gCO2/kWh at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: Average carbon intensity during training was 374.46 gCO2/kWh at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: Average carbon intensity during training was 374.46 gCO2/kWh at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: 
Actual consumption for 1 epoch(s):
	Time:	0:01:00
	Energy:	0.012452121068 kWh
	CO2eq:	4.662861475526 g
	This is equivalent to:
	0.043375455586 km travelled by car
CarbonTracker: 
Actual consumption for 1 epoch(s):
	Time:	0:01:00
	Energy:	0.012452121068 kWh
	CO2eq:	4.662861475526 g
	This is equivalent to:
	0.043375455586 km travelled by car
CarbonTracker: 
Actual consumption for 1 epoch(s):
	Time:	0:01:00
	Energy:	0.012452121068 kWh
	CO2eq:	

0.0006139305004492953

### Test the out-of-the-box model

In [33]:
print(test_script_oob)
os.system(test_script_oob)

python ../GRU4Rec_TensorFlow/gru4rec_BP/main.py --train_path ../datasets/diginetica\diginetica_processed_view_train_full.tsv --test_path ../datasets/diginetica\diginetica_processed_view_test.tsv --checkpoint_dir ../trained_models/gru4rec_tensorflow_oob --layer 1 --size 192 --epoch 5 --lr 0.1 --hidden_act tanh --final_act softmax --loss cross-entropy --dropout 0.15 --batch_size 50 --initial_accumulator_value 0.1 --decay 0.96 --train 0 --test 4 --m 1 5 10 20


0

## Train & test the minor fix model


In [34]:
train_script_minor, test_script_minor = create_gru4rec_tensorflow_script(model_name='gru4rec_tensorflow_minorfix', train_path=train_path, test_path=test_path, model_path=model_path, loss=loss, final_act=final_act, layers=layers, batch_size=batch_size, dropout_p_hidden=dropout_p_hidden, learning_rate=learning_rate, hidden_act=hidden_act, n_epochs=n_epochs, m=m, decay=1.0, initial_accumulator=1e-12)

### Train the minor fix model

In [35]:
track_training_C02_emissions(train_script_minor, "gru4rec_tensorflow_minorfix", "diginetica")

[codecarbon INFO @ 20:56:01] [setup] RAM Tracking...
[codecarbon INFO @ 20:56:01] [setup] GPU Tracking...
[codecarbon INFO @ 20:56:01] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 20:56:03] [setup] CPU Tracking...
[codecarbon INFO @ 20:56:04] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i9-13900HX
[codecarbon INFO @ 20:56:04] >>> Tracker's metadata:
[codecarbon INFO @ 20:56:04]   Platform system: Windows-11-10.0.22631-SP0
[codecarbon INFO @ 20:56:04]   Python version: 3.12.3
[codecarbon INFO @ 20:56:04]   CodeCarbon version: 2.4.2
[codecarbon INFO @ 20:56:04]   Available RAM : 31.746 GB
[codecarbon INFO @ 20:56:04]   CPU count: 32
[codecarbon INFO @ 20:56:04]   CPU model: 13th Gen Intel(R) Core(TM) i9-13900HX
[codecarbon INFO @ 20:56:04]   GPU count: 1
[codecarbon INFO @ 20:56:04]   GPU model: 1 x NVIDIA GeForce RTX 4090 Laptop GPU


CarbonTracker: The following components were found: GPU with device(s) NVIDIA GeForce RTX 4090 Laptop GPU.
CarbonTracker: The following components were found: GPU with device(s) NVIDIA GeForce RTX 4090 Laptop GPU.
CarbonTracker: The following components were found: GPU with device(s) NVIDIA GeForce RTX 4090 Laptop GPU.
CarbonTracker: The following components were found: GPU with device(s) NVIDIA GeForce RTX 4090 Laptop GPU.
CarbonTracker: The following components were found: GPU with device(s) NVIDIA GeForce RTX 4090 Laptop GPU.


[codecarbon INFO @ 20:56:23] Energy consumed for RAM : 0.000051 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 20:56:23] Energy consumed for all GPUs : 0.000439 kWh. Total GPU Power : 102.48604686266538 W
[codecarbon INFO @ 20:56:23] Energy consumed for all CPUs : 0.000187 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 20:56:23] 0.000677 kWh of electricity used since the beginning.
[codecarbon INFO @ 20:56:38] Energy consumed for RAM : 0.000099 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 20:56:38] Energy consumed for all GPUs : 0.000575 kWh. Total GPU Power : 33.466073083270224 W
[codecarbon INFO @ 20:56:38] Energy consumed for all CPUs : 0.000364 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 20:56:38] 0.001038 kWh of electricity used since the beginning.
[codecarbon INFO @ 20:56:53] Energy consumed for RAM : 0.000147 kWh. RAM Power : 11.904736518859863 W
[codecarbon INFO @ 20:56:53] Energy consumed for all GPUs : 0.000719 kWh. Total GPU Power : 35.857213685

CarbonTracker: Average carbon intensity during training was 374.46 gCO2/kWh at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: Average carbon intensity during training was 374.46 gCO2/kWh at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: Average carbon intensity during training was 374.46 gCO2/kWh at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: Average carbon intensity during training was 374.46 gCO2/kWh at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: Average carbon intensity during training was 374.46 gCO2/kWh at detected location: Santiago, Santiago Metropolitan, CL.
CarbonTracker: 
Actual consumption for 1 epoch(s):
	Time:	0:00:45
	Energy:	0.011577595357 kWh
	CO2eq:	4.335383752991 g
	This is equivalent to:
	0.040329151191 km travelled by car
CarbonTracker: 
Actual consumption for 1 epoch(s):
	Time:	0:00:45
	Energy:	0.011577595357 kWh
	CO2eq:	4.335383752991 g
	This is equivalent to:
	0.

0.0004673301362888544

### Test the minor fix model

In [36]:
print(test_script_minor)
os.system(test_script_minor)

python ../GRU4Rec_TensorFlow/gru4rec_BP/main.py --train_path ../datasets/diginetica\diginetica_processed_view_train_full.tsv --test_path ../datasets/diginetica\diginetica_processed_view_test.tsv --checkpoint_dir ../trained_models/gru4rec_tensorflow_minorfix --layer 1 --size 192 --epoch 5 --lr 0.1 --hidden_act tanh --final_act softmax --loss cross-entropy --dropout 0.15 --batch_size 128 --initial_accumulator_value 1e-12 --decay 1.0 --train 0 --test 4 --m 1 5 10 20


0