In [None]:
import os,sys
from tqdm import tqdm
import pickle

import torch
from models.DNN import NN
from dataset_utils import H5DatasetDNN, split_dataset
from utils.torch_utils import MARELoss, train

### Define the model parameters:
-  **EPOCHS**: Number of epochs to be used for training the model
-  **TRAIN_BATCH_SIZE**: Batch size
-  **LAYERS**: A list which includes the number of nodes in each layer of the DNN starting with the input layer and ending with the output layer
-  **LRATE**: Learning rate
-  **TRAINING_FOLDER**: Folder to store the model summary and weights after every epoch

In [None]:
EPOCHS = 50
TRAIN_BATCH_SIZE = 3
LAYERS = [28, 40, 35, 1] # shape of DNN
LRATE = 1e-3 # learning rate
TRAINING_FOLDER="../training/test"

### Loading Dataset:
Use the custom H5DatasetDNN dataloader and split the dataset into two categories for training and testing. The dataset can be also converted to the pickle format and one can use PklDatasetDNN class instead. One can also write a custom function and add it dataset_utils.py file.

In [None]:
file_path = '/home/rusack/shared/hdf5/hgcal_electron//hgcal_electron_data_test.h5'
dataset = H5DatasetDNN(file_path)
train_test_datasets = split_dataset(dataset)

X = train_test_datasets['train']
Y = train_test_datasets['test']

dataloaders = { 'train': torch.utils.data.DataLoader(X, TRAIN_BATCH_SIZE, shuffle=True),
                'test': torch.utils.data.DataLoader(Y, len(Y), shuffle=True)}

Run the training and add losses and learning rates to a list which can then be saved as a pickle file.

In [None]:
bar = tqdm(range(EPOCHS))
for epoch in pbar:
    for xtrain, ytrain in dataloaders['train']:
        train_loss, output_train = train(nn, xtrain, ytrain, optimizer, loss_func)
        
        test_loss = None
        output_test = None
        with torch.no_grad():
            for xtest, ytest in dataloaders['test']:
                xtest = torch.reshape(nn(xtest), (-1,))
                test_loss = MARELoss(xtest, ytest)
        
        epochs.append(epoch)
        train_loss_array.append(train_loss.item())
        valid_loss_array.append(test_loss.item())
        lr_array.append(optimizer.param_groups[0]['lr'])

        pbar.set_postfix({'training loss': train_loss.item(), 'validation loss': test_loss.item()})
        torch.save(nn.state_dict(), f'{TRAINING_FOLDER}/epoch{epoch}')

training_summary = {
    'epochs': epochs,
    'train_loss': train_loss_array,
    'valid_loss': valid_loss_array,
    'learning_rate': lr_array
}

with open(f'{TRAINING_FOLDER}/summary.pkl','wb') as f_:
    pickle.dump(training_summary, f_)

 lr_array
}

with open(f'{TRAINING_FOLDER}/summary.pkl','wb') as f_:
    pickle.dump(training_summary, f_)