# Preparation stuff

## Connect to Drive

In [1]:
connect_to_drive = False

In [2]:
#Run command and authorize by popup --> other window
if connect_to_drive:
    from google.colab import drive
    drive.mount('/content/gdrive', force_remount=True)

## Install packages

In [3]:
if connect_to_drive:
    #Install FS code
    !pip install  --upgrade --no-deps --force-reinstall git+https://github.com/federicosiciliano/easy_lightning.git@fedsic

    !pip install pytorch_lightning

## IMPORTS

In [4]:
#Put all imports here
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy
#import pickle
import os
import sys
#import cv2
import csv
import torch

## Define paths

In [5]:
#every path should start from the project folder:
project_folder = "../"
if connect_to_drive:
    project_folder = "/content/gdrive/Shareddrives/<SharedDriveName>" #Name of SharedDrive folder
    #project_folder = "/content/gdrive/MyDrive/<MyDriveName>" #Name of MyDrive folder

#Config folder should contain hyperparameters configurations
cfg_folder = os.path.join(project_folder,"cfg")

#Data folder should contain raw and preprocessed data
data_folder = os.path.join(project_folder,"data")
raw_data_folder = os.path.join(data_folder,"raw")
processed_data_folder = os.path.join(data_folder,"processed")

#Source folder should contain all the (essential) source code
source_folder = os.path.join(project_folder,"src")

#The out folder should contain all outputs: models, results, plots, etc.
out_folder = os.path.join(project_folder,"out")
img_folder = os.path.join(out_folder,"img")

## Import own code

In [6]:
#To import from src:

#attach the source folder to the start of sys.path
sys.path.insert(0, project_folder)

#import from src directory
# from src import ??? as additional_module
import easy_rec as additional_module #REMOVE THIS LINE IF IMPORTING OWN ADDITIONAL MODULE

import easy_exp, easy_rec, easy_torch #easy_data

# MAIN

## Train

### Data

In [7]:
cfg = easy_exp.cfg.load_configuration("config_rec")

In [8]:
#cfg["data_params"]["test_sizes"] = [cfg["data_params.dataset_params.out_seq_len.val"],cfg["data_params.dataset_params.out_seq_len.test"]]

data_params = cfg["data_params"].copy()
data_params["data_folder"] = raw_data_folder

data, maps = easy_rec.data_generation_utils.preprocess_dataset(**data_params)

Ratings data already exists. Skip pre-processing
Filtering by minimum number of users per item: 5
Filtering by minimum number of items per user: 5
Densifying index
Splitting: leave_n_out


In [9]:
#Save user and item mappings
# TODO: check
with open(os.path.join(processed_data_folder,"user_map.csv"), "w") as f_user:
    w = csv.writer(f_user)
    w.writerows(maps['uid'].items())

with open(os.path.join(processed_data_folder,"item_map.csv"), "w") as f_item:
    w = csv.writer(f_item)
    w.writerows(maps['sid'].items())

In [10]:
datasets = easy_rec.rec_torch.prepare_rec_datasets(data,**cfg["data_params"]["dataset_params"])

In [11]:
collator_params = cfg["data_params"]["collator_params"].copy()

In [12]:
collator_params["num_items"] = np.max(list(maps["sid"].values()))

In [13]:
app = collator_params.get("negatives_distribution",None)
if app is not None:
    if app == "popularity":
        collator_params["negatives_distribution"] = easy_rec.data_generation_utils.get_popularity_items(datasets["train"], collator_params["num_items"])
    elif app not in ["uniform"]:
        raise ValueError("Invalid negatives distribution")

In [14]:
collators = easy_rec.rec_torch.prepare_rec_collators(data, **collator_params)

In [15]:
loaders = easy_rec.rec_torch.prepare_rec_data_loaders(datasets, **cfg["model"]["loader_params"], collate_fn=collators)

In [16]:
rec_model_params = cfg["model"]["rec_model"].copy()
rec_model_params["num_items"] = np.max(list(maps["sid"].values()))
rec_model_params["num_users"] = np.max(list(maps["uid"].values()))
rec_model_params["lookback"] = cfg["data_params"]["collator_params"]["lookback"]

In [17]:
main_module = easy_rec.rec_torch.create_rec_model(**rec_model_params)

Seed set to 42


In [18]:
exp_found, experiment_id = easy_exp.exp.get_set_experiment_id(cfg)
print("Experiment already found:", exp_found, "----> The experiment id is:", experiment_id)

Experiment already found: False ----> The experiment id is: TlsAqPomWlMt5sHh


In [19]:
# # Find "original" implementation:
# # ...

# keys_to_change = {"model.rec_model.seed": 42}
# orig_cfg = deepcopy(cfg)
# for k,v in keys_to_change.items():
#     orig_cfg[k] = 42

# orig_exp_found, orig_experiment_id = easy_exp.exp.get_experiment_id(orig_cfg)
# print("Experiment already found:", orig_exp_found, "----> The experiment id is:", orig_experiment_id)

# Caricare modello originale (last o best) e fare predizione...
# Mettere la predizione dentro metrica RLS... --> prossime celle

In [20]:
#if exp_found: exit() #TODO: make the notebook/script stop here if the experiment is already found

In [21]:
trainer_params = easy_torch.preparation.prepare_experiment_id(cfg["model"]["trainer_params"], experiment_id)

# Prepare callbacks and logger using the prepared trainer_params
trainer_params["callbacks"] = easy_torch.preparation.prepare_callbacks(trainer_params)
trainer_params["logger"] = easy_torch.preparation.prepare_logger(trainer_params)

# Prepare the trainer using the prepared trainer_params
trainer = easy_torch.preparation.prepare_trainer(**trainer_params)

model_params = cfg["model"].copy()

model_params["loss"] = easy_torch.preparation.prepare_loss(cfg["model"]["loss"], additional_module.losses)

# Prepare the optimizer using configuration from cfg
model_params["optimizer"] = easy_torch.preparation.prepare_optimizer(**cfg["model"]["optimizer"])

# Prepare the metrics using configuration from cfg
model_params["metrics"] = easy_torch.preparation.prepare_metrics(cfg["model"]["metrics"], additional_module.metrics)

# Create the model using main_module, loss, and optimizer
model = easy_torch.process.create_model(main_module, **model_params)

Seed set to 42
Seed set to 42
Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/homebrew/lib/python3.11/site-packages/pytorch_lightning/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42


In [22]:
# Prepare the emission tracker using configuration from cfg
#tracker = easy_torch.preparation.prepare_emission_tracker(**cfg["model"]["emission_tracker"], experiment_id=experiment_id)

In [23]:
# Prepare the flops profiler using configuration from cfg
#profiler = easy_torch.preparation.prepare_flops_profiler(model=model, **cfg["model"]["flops_profiler"], experiment_id=experiment_id)

### Train

In [24]:
easy_torch.process.test_model(trainer, model, loaders, test_key=["train","val","test"]) #, tracker=tracker, profiler=profiler)

Seed set to 42
Missing logger folder: ../out/log/prova/TlsAqPomWlMt5sHh/lightning_logs
/opt/homebrew/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:492: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/opt/homebrew/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


Testing: |          | 0/? [00:00<?, ?it/s]

x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 10, 2])
now torch.Size([128, 10, 1350]) torch.Size([128, 10, 2])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 10, 2])
now torch.Size([128, 10, 1350]) torch.Size([128, 10, 2])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 10, 2])
now torch.Size([128, 10, 1350]) torch.Size([128, 10, 2])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 10, 2])
now torch.Size([128, 10, 1350]) t

In [25]:
# Train the model using the prepared trainer, model, and data loaders
easy_torch.process.train_model(trainer, model, loaders, val_key=["val","test"]) #tracker=tracker, profiler=profiler, 

Seed set to 42

  | Name        | Type                        | Params
------------------------------------------------------------
0 | main_module | CosRec2                     | 76.3 K
1 | loss        | SequentialBCEWithLogitsLoss | 0     
2 | metrics     | ModuleDict                  | 0     
------------------------------------------------------------
76.3 K    Trainable params
0         Non-trainable params
76.3 K    Total params
0.305     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/opt/homebrew/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 2])
now torch.Size([128, 1, 1350]) torch.Size([128, 1, 2])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 2])
now torch.Size([128, 1, 1350]) torch.Size([128, 1, 2])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 101])
now torch.Size([128, 1, 1350]) torch.Size([128, 1, 101])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 101])
now torch.Size([128, 1, 1350]) torch.

/opt/homebrew/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
/opt/homebrew/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (8) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 10, 2])
now torch.Size([128, 10, 1350]) torch.Size([128, 10, 2])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 10, 2])
now torch.Size([128, 10, 1350]) torch.Size([128, 10, 2])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 10, 2])
now torch.Size([128, 10, 1350]) torch.Size([128, 10, 2])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 10, 2])
now torch.Size([128, 10, 1350]) t

Validation: |          | 0/? [00:00<?, ?it/s]

x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 2])
now torch.Size([128, 1, 1350]) torch.Size([128, 1, 2])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 2])
now torch.Size([128, 1, 1350]) torch.Size([128, 1, 2])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 2])
now torch.Size([128, 1, 1350]) torch.Size([128, 1, 2])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 2])
now torch.Size([128, 1, 1350]) torch.Size([

`Trainer.fit` stopped: `max_epochs=1` reached.


x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 101])
now torch.Size([128, 1, 1350]) torch.Size([128, 1, 101])
x torch.Size([47, 32, 10, 10])
pool torch.Size([47, 10, 32])
fc torch.Size([47, 10, 24])
fc2 torch.Size([47, 10, 24])
scores torch.Size([47, 10, 1350])
items_to_predict torch.Size([47, 1, 101])
now torch.Size([47, 1, 1350]) torch.Size([47, 1, 101])


In [26]:
easy_torch.process.test_model(trainer, model, loaders) #, tracker=tracker, profiler=profiler)

Seed set to 42


Testing: |          | 0/? [00:00<?, ?it/s]

x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 101])
now torch.Size([128, 1, 1350]) torch.Size([128, 1, 101])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 101])
now torch.Size([128, 1, 1350]) torch.Size([128, 1, 101])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 101])
now torch.Size([128, 1, 1350]) torch.Size([128, 1, 101])
x torch.Size([128, 32, 10, 10])
pool torch.Size([128, 10, 32])
fc torch.Size([128, 10, 24])
fc2 torch.Size([128, 10, 24])
scores torch.Size([128, 10, 1350])
items_to_predict torch.Size([128, 1, 101])
now torch.Size([128, 1, 1350]

In [27]:
# Save experiment and print the current configuration
#save_experiment_and_print_config(cfg)
easy_exp.exp.save_experiment(cfg)

# Print completion message
print("Execution completed.")
print("######################################################################")
print()

Execution completed.
######################################################################

