In [None]:
import sys
sys.path.append('..')
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os
import numpy as np
import xarray as xr
import random as rd
import platform
%matplotlib inline
 
from tool.train_evaluate import Trainer, Evaluator
from tool.dataset import NetCDFDataset
from tool.loss import RMSELoss
from tool.utils import Util

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch import optim

Loading specific settings (some defined by the "run-notebooks.sh" script)

In [None]:
%run settings.py

In [None]:
util = Util(model_name, version=version, prefix=dataset)

os.environ["CUDA_VISIBLE_DEVICES"]=cuda
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

Seed (reproducibility)

In [None]:
seed = 1000
np.random.seed(seed)
rd.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic=True

In [None]:
def init_seed(seed):
    np.random.seed(seed)

Loading the dataset

In [None]:
ds = xr.open_mfdataset(dataset_file)
if small_dataset: #useful for quick experimentation
    ds = ds[dict(sample=slice(0,500))]

In [None]:
train_dataset = NetCDFDataset(ds, test_split=test_split, 
                              validation_split=validation_split)
val_dataset   = NetCDFDataset(ds, test_split=test_split, 
                              validation_split=validation_split, is_validation=True)
test_dataset  = NetCDFDataset(ds, test_split=test_split, 
                              validation_split=validation_split, is_test=True)

In [None]:
print('[X_train] Shape:', train_dataset.X.shape)
print('[y_train] Shape:', train_dataset.y.shape)
print('[X_val] Shape:', val_dataset.X.shape)
print('[y_val] Shape:', val_dataset.y.shape)
print('[X_test] Shape:', test_dataset.X.shape)
print('[y_test] Shape:', test_dataset.y.shape)

In [None]:
params = {'batch_size': batch_size,
          'num_workers': 4, 
          'worker_init_fn': init_seed}

train_loader = DataLoader(dataset=train_dataset, shuffle=True, **params)
val_loader = DataLoader(dataset=val_dataset, shuffle=False, **params)
test_loader = DataLoader(dataset=test_dataset, shuffle=False, **params)

Creating the model

In [None]:
model_bulder = models[model_name]
print(model_bulder)

model = model_bulder(train_dataset.X.shape, model_param['num_layers'], model_param['hidden_dim'], 
                     model_param['kernel_size'], device, dropout_rate)
model.to(device)
criterion = RMSELoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, alpha=0.9, eps=1e-6)

Training the model

In [None]:
%%time
checkpoint_filename = util.get_checkpoint_filename()

print('Train on {} samples, validate on {} samples'.format(len(train_dataset), len(val_dataset)))

trainer = Trainer(model, criterion, optimizer, train_loader, val_loader, epochs, 
                  device, util, verbose=True, patience=None, no_stop=True)

train_losses, val_losses = trainer.fit(checkpoint_filename, is_chirps)

In [None]:
util.save_loss(train_losses, val_losses)
util.plot([train_losses, val_losses], ['Training', 'Validation'], 
          'Epochs', 'Loss', 'Learning curve', inline=True)

Loading and Evaluating the model

In [None]:
test_rmse, test_mae = None, None
evaluator = Evaluator(model, criterion, optimizer, test_loader, device, util, step)
best_epoch, val_loss = evaluator.load_checkpoint(checkpoint_filename)
if not(only_training):
    test_rmse, test_mae = evaluator.eval(is_chirps=is_chirps)
    print(f'Test RMSE: {test_rmse:.4f}')
    print(f'Test MAE: {test_mae:.4f}')

In [None]:
if (torch.cuda.is_available()):
    torch.cuda.empty_cache()

Notification via email

In [None]:
model_info = {'Notebook': 'yes',
              'best_epoch': best_epoch,
              'val_error': val_loss,
              'test_rmse': test_rmse,
              'test_mae': test_mae,
              'step': step,
              'num_layers': model_param['num_layers'],
              'kernel_size': model_param['kernel_size'],
              'hidden_dim': model_param['hidden_dim'],
              'dropout_rate': dropout_rate,
              'learning_rate': lr,
              'dataset': dataset,
              'hostname': platform.node()}

In [None]:
#util.send_email(model_info)