**GRU training notebook**

1. Import all the needed libraries

In [None]:
import torch
import numpy as np
from torch import optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch import nn
!pip install ipdb
import ipdb
import matplotlib.pyplot as plt
from datetime import datetime
import os, gc
%matplotlib inline

SEED = 0
np.random.seed(SEED)
torch.manual_seed(SEED)


2. Define neural architecture (gru_net)

In [288]:
class gru_net(nn.Module):
    def __init__(self, model_pars, model_hpars, train_pars):
        super(gru_net, self).__init__()
        dropout_layer =  torch.nn.Dropout(p = model_hpars['p_dropout'])
        self.dropout1 = dropout_layer
        self.rnn      = torch.nn.GRU(input_size   = model_pars['n_inputs'],
                                    hidden_size   = model_hpars['n_units'],
                                    num_layers    = model_hpars['n_layers'],
                                    batch_first   = True,
                                    dropout       = (dropout_layer if model_hpars['n_layers'] > 1 else 0.),
                                    bidirectional = False,
                                    device        = train_pars.get('device', torch.device('CPU')),
                                    dtype         = train_pars.get('dtype', None))
        self.dropout2  = dropout_layer
        self.transform = torch.nn.Linear(model_hpars['n_units'], model_pars['n_outputs'])
    
    def forward(self, X):
        output, hidden = self.rnn(self.dropout1(X))
        output         = self.transform(self.dropout2(output))
        return F.log_softmax(output,dim = 2).swapaxes(1,2)
        # before: n_batches X n_times X n_classes
        # after: n_batches X n_classes X n_times

3. Define class (GRUmodel) and methods to support data preparation, training, evaluation, and plotting

In [289]:
class GRUmodel:
    """Class that handles operations on gru_net"""
    def __init__(self, data, model_init, model_pars, model_hpars, train_pars, log_pars):
        self.model      = model_init(model_pars, model_hpars, train_pars)
        self.model_pars = model_pars
        self.data       = data
        self.train_pars = train_pars
        self.log_pars   = log_pars
        self.create_DataLoaders()
        if self.train_pars['use_gpu'] and torch.cuda.is_available():
            self.device = torch.device('cuda:0')
            gpu_idxs    = np.arange(min(train_pars['n_gpus'], torch.cuda.device_count())).tolist()
            self.model  = torch.nn.DataParallel(self.model.to(self.device), device_ids = gpu_idxs)
        else:
          self.device = 'cpu'
        self.optimizer  = torch.optim.Adam(self.model.parameters(), 
                                          lr           = train_pars.get('lrate', 1e-3), 
                                          weight_decay = train_pars.get('alpha', 0.0))
        self.loss        = nn.NLLLoss() 
        self.trainLoss, self.trainAcc = [], []
        self.validLoss, self.validAcc  = [], []
        if self.log_pars['save_checkpoints']:
          if not os.path.isdir(self.log_pars['save_path']):
            os.mkdir(self.log_pars['save_path'])
        self.last_improv = 0
        self.results = {'acc':{},'loss':{},'best_epoch':{}}

    def train(self):
        """Trains model for given number of iterations with early stopping"""
        best_valid_loss = 1e8
        last_improv     = -1
        print('Training Started\n')
        for i in range(self.train_pars['n_epochs']):
            print(f"Epoch: {i+1}/{self.train_pars['n_epochs']}")

            # Train on entire dataset
            train_loss, train_acc    = self.train_epoch()
            valid_loss, valid_acc, _ = self.eval_epoch('validation')

            # Store train and validation losses
            self.trainLoss.append(train_loss)
            self.trainAcc.append(train_acc)
            self.validLoss.append(valid_loss)
            self.validAcc.append(valid_acc)

            # Print train and validation losses
            if self.log_pars['verbose']:
                if (i % log_pars['print_freq']) == 0:
                    print(f"Training  : loss = {train_loss:.2f}, acc = {train_acc:.2f}")
                    print(f"Validation: loss = {valid_loss:.2f}, acc = {valid_acc:.2f}")

            # Early stopping
            if valid_loss < best_valid_loss:
                best_valid_loss  = valid_loss
                self.last_improv = i
                if self.log_pars['save_checkpoints'] is not None:
                    self.save_checkpoint(best_valid_loss, valid_acc, self.last_improv)
            if (i - self.last_improv) > train_pars['valid_pat']:
                break

        # Store final results        
        self.results['acc']['train']  = self.trainAcc[self.last_improv]
        self.results['loss']['train'] = self.trainLoss[self.last_improv]
        self.results['acc']['valid']  = self.validAcc[self.last_improv]
        self.results['loss']['valid'] = self.validLoss[self.last_improv]
        self.results['best_epoch']    = self.last_improv
        self.plot_training_results()
        return self.results

    def train_epoch(self):
        # Train the model for one epoch
        num_batches = len(self.DataLoaderTrain)
        num_samples = num_batches*self.train_pars['batch_size']*self.model_pars['n_times']
        train_loss, train_acc = 0, 0
        self.model.train()
        for batch, (X, y) in enumerate(self.DataLoaderTrain):
            X, y = X.to(self.device), y.to(self.device)

            # Compute prediction error
            ypred        = self.model(X)
            c_train_loss = self.loss(ypred, y)

            # Zero the gradient
            self.optimizer.zero_grad()

            # Calculate gradient
            c_train_loss.backward()

            # Update weights
            self.optimizer.step()
            
            # Acumulate losses and correct classifications
            train_loss += c_train_loss.item()*self.model_pars['n_times']
            train_acc  += (ypred.argmax(1) == y).type(torch.float).sum().item()
        train_loss /= num_samples
        train_acc  /= num_samples
        return train_loss, train_acc

    def eval_epoch(self, modality):
       # Evaluate the model
        if modality=='validation':
          dataloader = self.DataLoaderVal
        elif modality=='test':
          dataloader = self.DataLoaderTest

        num_batches = len(dataloader)
        num_samples = num_batches*self.train_pars['batch_size']*self.model_pars['n_times']
        eval_loss, eval_acc = 0, 0
        self.model.eval()

        with torch.no_grad():
          for batch, (X, y) in enumerate(dataloader):
            X, y  = X.to(self.device), y.to(self.device)
            ypred = self.model(X)
            eval_loss += self.loss(ypred, y).item()*self.model_pars['n_times']
            eval_acc  += (ypred.argmax(1) == y).type(torch.float).sum().item()
          eval_loss /= num_samples
          eval_acc  /= num_samples
          return eval_loss, eval_acc, ypred

    def test(self): 
      # Evaluate the model on test set
      self.results['loss']['test'], self.results['acc']['test'], ypred = self.eval_epoch('test')
      return self.results, ypred

    def plot_training_results(self):
      # Plot training results
      last_index = self.last_improv+1
      neps = range(0,last_index)
      plt.plot(neps, self.trainLoss[0:last_index], label = 'training loss')
      plt.plot(neps, self.trainAcc[0:last_index], label = 'training acc')
      plt.plot(neps, self.validLoss[0:last_index], label = 'validation loss')
      plt.plot(neps, self.validAcc[0:last_index], label  = 'validation acc')
      plt.xlabel('Training Epochs')
      plt.legend()
      plt.title(f'Training Results[0:{self.last_improv}]')
      plt.show()

    def create_DataLoaders(self):
      # Create data loaders to support batch training
      self.DataLoaderTrain =        DataLoader(TensorDataset(self.data['train']['X'], 
                                    self.data['train']['y']), 
                                    batch_size = self.train_pars['batch_size'], 
                                    shuffle    = True)
      self.DataLoaderVal =          DataLoader(TensorDataset(self.data['valid']['X'], 
                                    self.data['valid']['y']), 
                                    batch_size = self.train_pars['batch_size'])
      self.DataLoaderTest =         DataLoader(TensorDataset(self.data['test']['X'], 
                                    self.data['test']['y']), 
                                    batch_size = self.train_pars['batch_size']) 

    def save_checkpoint(self, best_valid_loss, valid_acc, last_improv):
      # Save model checkpoint
        default_ckpt = {
            "state_dict"  : self.model.state_dict(),
            "optim_state" : self.optimizer.state_dict(),
            "valid_loss"  : best_valid_loss,
            "valid_acc"   : valid_acc,
            "epoch"       : last_improv
        }
        file_name = self.log_pars['save_path'] + f'epoch_{last_improv}_valLoss_{best_valid_loss}_valAcc_{valid_acc}'
        if not os.path.exists(file_name):
          torch.save(default_ckpt, file_name)

4. Load and prepare the data

In [291]:
# Define function to convert data to torch tensors
def tensorize_dict(data_in):
  for phase in data_in:
   data_in[phase]['X'] = torch.from_numpy(data_in[phase]['X']).to(torch.float32)
   data_in[phase]['y'] = torch.from_numpy(data_in[phase]['y']).to(torch.long)
  return data_in
# Load the preprocessed
data_ts = np.load('./preprocessed/ppd.npy',      allow_pickle='TRUE').item()
dims_ts = np.load('./preprocessed/ppd_dims.npy', allow_pickle='TRUE').item()
data_ts = tensorize_dict(data_ts)

5. Initialize the GRUmodel class

In [None]:
# Define path to store intermediate results
SESSION_NAME         = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + '_GRU/'
PATH_TO_FILES_DIR    = PATH_TO_DRIVE + 'MyFolder/' + SESSION_NAME

# Define parameters
model_pars     = {'n_inputs': dims_ts['inp'], 'n_outputs': 2*dims_ts['outp'],\
                  'n_times': dims_ts['times']}
model_hpars    = {'n_units': 400, 'n_layers':1, 'p_dropout': 0.25,
                  'lrate': 1e-3, 'l2_weight':0}
train_pars     = {'n_epochs': 2000, 'batch_size': 1, 'valid_pat': 50,\
                  'use_gpu': True, 'n_gpus':1}
log_pars       = {'print_freq': 1, 'save_checkpoints': True, 'save_path': PATH_TO_FILES_DIR, 
                  'verbose': True}

# Initialize the model
model = GRUmodel(
    data        = data_ts,
    model_init  = gru_net,
    model_pars  = model_pars,
    model_hpars = model_hpars,
    train_pars  = train_pars,
    log_pars    = log_pars
)

6. Train and test the model

In [None]:
# Train the model and inspect results
model.train()

# Test the model
results, predictions = model.test()
results['test_pred'] = predictions


In [294]:
results

{'acc': {'train': 0.82525, 'valid': 0.8245, 'test': 0.82575},
 'loss': {'train': 0.3912906050682068,
  'valid': 0.3853456974029541,
  'test': 0.6048155426979065},
 'best_epoch': 33,
 'test_pred': tensor([[[-1.7276e-03, -1.9715e-04, -1.7248e-04,  ..., -3.7792e-01,
           -6.6707e-03, -1.2265e-03],
          [-6.3619e+00, -8.5318e+00, -8.6651e+00,  ..., -1.1561e+00,
           -5.0134e+00, -6.7042e+00]]], device='cuda:0')}