# Preparation stuff

## Connect to Drive

In [None]:
connect_to_drive = False

In [None]:
#Run command and authorize by popup --> other window
if connect_to_drive:
    from google.colab import drive
    drive.mount('/content/gdrive', force_remount=True)

## Install packages

In [None]:
if connect_to_drive:
    #Install FS code
    !pip install  --upgrade --no-deps --force-reinstall git+https://github.com/siciliano-diag/easy_lightning.git@fedsic

    !pip install pytorch_lightning

## IMPORTS

In [None]:
#Put all imports here
import numpy as np
import matplotlib.pyplot as plt
#from copy import deepcopy
#import pickle
import os
import sys
#import cv2
import torch
import wandb

## Define paths

In [None]:
#every path should start from the project folder:
project_folder = "../"
if connect_to_drive:
    project_folder = "/content/gdrive/Shareddrives/RecSys Reproducibility"#"/content/gdrive/MyDrive/PhD/Lavori/Explainability/Fluorophores separation"
    #project_folder = "/content/gdrive/MyDrive/<MyDriveName>" #Name of MyDrive folder

#Config folder should contain hyperparameters configurations
cfg_folder = os.path.join(project_folder,"cfg")

#Data folder should contain raw and preprocessed data
data_folder = os.path.join(project_folder,"data")
raw_data_folder = os.path.join(data_folder,"raw")
processed_data_folder = os.path.join(data_folder,"processed")

#Source folder should contain all the (essential) source code
source_folder = os.path.join(project_folder,"src")

#The out folder should contain all outputs: models, results, plots, etc.
out_folder = os.path.join(project_folder,"out")
img_folder = os.path.join(out_folder,"img")

## Import own code

In [None]:
#To import from src:

#attach the source folder to the start of sys.path
sys.path.insert(0, project_folder)

#import from src directory

import data_utils, exp_utils, torch_utils, rec_utils

#from src.easy_lightning_rec_utils import rec_utils

# MAIN

## Train

### Data

In [None]:
cfg = exp_utils.cfg.load_configuration()

In [None]:
cfg["data_params"]["data_folder"] = raw_data_folder

In [None]:
#cfg["data_params"]["test_sizes"] = [cfg["data_params.dataset_params.out_seq_len.val"],cfg["data_params.dataset_params.out_seq_len.test"]]

data, maps = rec_utils.data_generation_utils.preprocess_dataset(**cfg["data_params"])

#TODO: save maps

In [None]:
datasets = rec_utils.rec_torch_utils.prepare_rec_datasets(data,**cfg["data_params"]["dataset_params"])

In [None]:
cfg["model"]["loader_params"]["num_items"] = np.max(list(maps["sid"].values()))

In [None]:
loaders = rec_utils.rec_torch_utils.prepare_rec_data_loaders(datasets, data, **cfg["model"]["loader_params"])

In [None]:
for x in loaders["train"]: break

In [None]:
cfg["model"]["rec_model"]["num_items"] = np.max(list(maps["sid"].values()))
cfg["model"]["rec_model"]["num_users"] = np.max(list(maps["uid"].values()))
cfg["model"]["rec_model"]["lookback"] = cfg["data_params"]["dataset_params"]["lookback"]

In [None]:
main_module = rec_utils.rec_torch_utils.create_rec_model(**cfg["model"]["rec_model"])

In [None]:
exp_found, experiment_id = exp_utils.exp.get_set_experiment_id(cfg)
print("Experiment already found:", exp_found, "----> The experiment id is:", experiment_id)

In [None]:
#if exp_found: exit() #TODO: make the notebook stop here if the experiment is already found

In [None]:
trainer_params = torch_utils.preparation.prepare_experiment_id(cfg["model"]["trainer_params"], experiment_id, cfg=exp_utils.exp.get_clean_cfg(cfg))

# Prepare callbacks and logger using the prepared trainer_params
trainer_params["callbacks"] = torch_utils.preparation.prepare_callbacks(trainer_params)
trainer_params["logger"] = torch_utils.preparation.prepare_logger(trainer_params)

# Prepare the trainer using the prepared trainer_params
trainer = torch_utils.preparation.prepare_trainer(**trainer_params)

model_params = cfg["model"].copy()

model_params["loss"] = torch_utils.preparation.prepare_loss(cfg["model"]["loss"], rec_utils.losses)

# Prepare the optimizer using configuration from cfg
model_params["optimizer"] = torch_utils.preparation.prepare_optimizer(**cfg["model"]["optimizer"])

# Prepare the metrics using configuration from cfg
model_params["metrics"] = torch_utils.preparation.prepare_metrics(cfg["model"]["metrics"], rec_utils.metrics)

# Create the model using main_module, loss, and optimizer
model = torch_utils.process.create_model(main_module, **model_params)

In [None]:
# Prepare the emission tracker using configuration from cfg
tracker = torch_utils.preparation.prepare_emission_tracker(**cfg["model"]["emission_tracker"], experiment_id=experiment_id)

### Train

In [None]:
# Train the model using the prepared trainer, model, and data loaders
torch_utils.process.train_model(trainer, model, loaders, tracker=tracker, val_key=["val","test"])

In [None]:
torch_utils.process.test_model(trainer, model, loaders, tracker=tracker)

In [None]:
# Save experiment and print the current configuration
#save_experiment_and_print_config(cfg)
exp_utils.exp.save_experiment(cfg)
wandb.finish()
# Print completion message
print("Execution completed.")
print("######################################################################")
print()