## Short-term prediction using LSTM and IBTrACS

v02: Input tensors are multivariate sequence of observations, thus have a shape (n_samples, timestamp, n_features) = (n_samples, 4, 5).
Uses Dataset and DataLoader.

TODO: use GPU inside ShortTimeseriesDataset()

In [1]:
# General
# import glob
import os.path
# import warnings
# warnings.filterwarnings('ignore')
from tqdm import tqdm

# Arrays & Displays
import xarray as xr
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
# from mpl_toolkits.axes_grid1 import make_axes_locatable
# # from matplotlib.colors import Normalize
# # from matplotlib.colors import ListedColormap
# # import matplotlib.cm as cm
# import pandas as pd

# PyTorch
import torch
import torch.nn as nn
from torch.autograd import Variable              # Convert arrays to tensors
from torch.utils.data import Dataset, DataLoader # Create a Dataset class to combine with DataLoader (= mini batches selection)
import pytorch_lightning as pl


# Data treatment
# import dask as da
# from dask.diagnostics import ProgressBar
# import zarr
# from scipy.interpolate import griddata
from datetime import datetime

# Custom
import dataUtils    as du
import pytorchUtils as pu

# Statistics
from sklearn import linear_model, neighbors

# Default parameters
mpl.rcParams.update({'font.size': 18})
mpl.rcParams['figure.figsize'] = (15, 10)
mpl.rcParams['axes.facecolor'] = 'white'
mpl.rcParams['figure.facecolor'] = 'white'

In [2]:
### Setup device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
print('{} GPU(s) available'.format(torch.cuda.device_count()))

Using cuda device
1 GPU(s) available


In [3]:
### PARAMS
PARAMS = {'seq_len':     4, # length of the input time series used to predict y(t)
          'n_features':  5,     # nb of output features
          
          # Model parameters
          'input_size':  5,     # nb of input features
          'hidden_size': 2,     # nb of features in hidden state
          'num_layers':  1,     # nb of stacked lstm layers
    
          'batch_size':  8,     
          'n_epochs':    10,     # nb of epochs
          'learn_rate':  0.001, # learning rate
          
          'save_figs':       False,
          'feature_scaling': True,
         }

### PATHS
PATHS  = {
    # Data
    'ibtracs_data': '/home/arthur/data/ibtracs/IBTrACS.NA.v04r00.nc', # '/home/arthur/data/ibtracs/IBTrACS.NA.v04r00.nc'
    # Save
    'lstm_path':    '/home/arthur/results/TCsLifeMonitFromObs/lstm_dynamic_pred_ibtracs/lstm.pth', 
}

### VARIABLES TO CONSIDER
'''Initial dataset has 147 variables, so we select only a subset of these'''
# storm speed, time, dist2land, usa_r64, usa_r50
params_of_interest = ['usa_lon', 'usa_lat', 'usa_wind', 'usa_r34', 'usa_rmw'] 
additional_info    = ['numobs', 'sid', 'basin', 'name', 'usa_agency', 'iso_time', 'nature']

In [4]:
### OPEN DATASET
ds_ibt_raw = xr.open_dataset(PATHS['ibtracs_data'])
ds_ibt     = ds_ibt_raw[params_of_interest + additional_info]
# ds_ibt_raw

In [5]:
### FILTERS
# By year
start_date = np.datetime64('2000-01-01')
fsi        = np.nanargmin(np.abs(ds_ibt['time'][:, 0] - start_date)) # First storm index
ds_ibt     = ds_ibt.isel(storm=slice(fsi, -1))
# By latitude
ds_ibt     = ds_ibt.where(np.abs(ds_ibt['lat']) <= 30)
# By removing empty Rmax time series
ds_ibt     = ds_ibt.where(ds_ibt['usa_rmw'].notnull().sum(axis=1) > 5)
# By removing empty R34 time series
ds_ibt     = ds_ibt.where(ds_ibt['usa_r34'].notnull().sum(axis=1) > 5) 
# By agency
if os.path.basename(PATHS['ibtracs_data']) == 'IBTrACS.NA.v04r00.nc': ds_ibt     = ds_ibt.where(ds_ibt['usa_agency'] == b'hurdat_atl')

# ==> Drop NaNs
ds_ibt     = ds_ibt.dropna(dim='storm', how='all') # Much quicker to drop NaNs only at the end

In [6]:
# LR_variables = ['usa_lon', 'usa_lat', 'usa_r34']
# ds_ibt_LR    = ds_ibt[LR_variables]
# ds_ibt_LR

In [7]:
### CREATE DATASET (Train and test)
# FIRST WE AVERAGE OVER EVERY QUADRANT
# Pre-processing
MU    = {}
SIG   = {}
SCALE = {'usa_wind': 3,
         'usa_rmw':  3, 
         'usa_r34':  2,
         'usa_lon':  0.5,
         'usa_lat':  1,
}
if PARAMS['feature_scaling']:
    for param in params_of_interest:
        MU[param]     = float(ds_ibt[param].mean(skipna=True))
        SIG[param]    = float(ds_ibt[param].std(skipna=True))
        ds_ibt[param] = SCALE[param] * ((ds_ibt[param] - MU[param]) / SIG[param])

# Separate train and test set
sep = int(0.8 * len(ds_ibt['storm'])) # 80% train, 20% test
ds_train, ds_test = ds_ibt.isel(storm=slice(None, sep)), ds_ibt.isel(storm=slice(sep, None))
print('Train set: %i storms;  '%len(ds_train['storm']), 'Test set: %i storms'%len(ds_test['storm']))

# Create Dataset
X_train, y_train = du.create_dataset(ds_train, params_of_interest, PARAMS)
X_test, y_test   = du.create_dataset(ds_test,  params_of_interest, PARAMS)
print('Shape of predictors matrix X_train: ', np.asarray(X_train).shape)
print('Shape of targets matrix y_train: ', np.asarray(y_train).shape)

Train set: 188 storms;   Test set: 48 storms


100%|█████████████████████████████████████████| 188/188 [00:03<00:00, 58.63it/s]
100%|███████████████████████████████████████████| 48/48 [00:00<00:00, 52.23it/s]

Shape of predictors matrix X_train:  (2432, 20)
Shape of targets matrix y_train:  (2432, 5)





In [8]:
# Open Datasets
train_dataset = pu.ShortTimeseriesDataset(X_train, 
                                          y_train,
                                          PARAMS['n_features'],
                                          PARAMS['seq_len']
                                         )
test_dataset  = pu.ShortTimeseriesDataset(X_test, 
                                          y_test,
                                          PARAMS['n_features'],
                                          PARAMS['seq_len']
                                         )

# DataLoader
torch.manual_seed(99)
train_loader = DataLoader(train_dataset, batch_size=PARAMS['batch_size'], shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=PARAMS['batch_size'], shuffle=False)

X, y = next(iter(train_loader)) # Check
print("Features shape:", X.shape)
print("Target shape:", y.shape)

Features shape: torch.Size([8, 4, 5])
Target shape: torch.Size([8, 5])


In [9]:
# Declare model
# Normally, LSTM1 handles batch_size
model = pu.LSTM1(num_classes=PARAMS['n_features'], 
                 input_size=PARAMS['input_size'],
                 hidden_size=PARAMS['hidden_size'],
                 num_layers=PARAMS['num_layers'],
                 seq_len=PARAMS['seq_len'],             
)
print(model)

# Loss function, optimizer
loss_function = torch.nn.MSELoss()    # mean-squared error for regression
optimizer     = torch.optim.Adam(model.parameters(), lr=PARAMS['learn_rate']) 

LSTM1(
  (lstm): LSTM(5, 2, batch_first=True)
  (fc_1): Linear(in_features=2, out_features=128, bias=True)
  (fc): Linear(in_features=128, out_features=5, bias=True)
  (relu): ReLU()
)


In [10]:
def train_model(data_loader, model, loss_function, optimizer):
    num_batches = len(data_loader)
    total_loss  = 0
    model.train()

    for X, y in data_loader:
        output = model(X)
        loss = loss_function(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / num_batches
    print(f"Train loss: {avg_loss}")

def test_model(data_loader, model, loss_function):
    num_batches = len(data_loader)
    total_loss  = 0

    model.eval()
    with torch.no_grad():
        for X, y in data_loader:
            output = model(X)
            total_loss += loss_function(output, y).item()

    avg_loss = total_loss / num_batches
    print(f"Test loss: {avg_loss}")

def predict(data_loader, model):

    output = torch.tensor([])
    model.eval()
    with torch.no_grad():
        for X, _ in data_loader:
            y_star = model(X)
            output = torch.cat((output, y_star), 0)

    return output

print("Untrained test\n--------")
test_model(test_loader, model, loss_function)
print()

for ix_epoch in range(PARAMS['n_epochs']):
    print(f"Epoch {ix_epoch}\n---------")
    train_model(train_loader, model, loss_function, optimizer=optimizer)
    test_model(test_loader, model, loss_function)
    print()
    
# # Save model
# torch.save(lstm1.state_dict(), PATHS['lstm_path'])

Untrained test
--------
Test loss: 4.6562081020453885

Epoch 0
---------
Train loss: 3.4222859210873904
Test loss: 2.6662339992906854

Epoch 1
---------
Train loss: 2.4285676579334234
Test loss: 2.1775875132659386

Epoch 2
---------
Train loss: 1.8200904832858789
Test loss: 1.6099481654578243

Epoch 3
---------
Train loss: 1.3791104797273874
Test loss: 1.3881917876758794

Epoch 4
---------
Train loss: 1.229969138867761
Test loss: 1.2994786134396477

Epoch 5
---------
Train loss: 1.1605768213538747
Test loss: 1.2725745112388984

Epoch 6
---------
Train loss: 1.1092506685343229
Test loss: 1.2366023462736744

Epoch 7
---------
Train loss: 1.065027684757584
Test loss: 1.1757734428192008

Epoch 8
---------
Train loss: 1.0262919734967382
Test loss: 1.1189834751274395

Epoch 9
---------
Train loss: 0.9758337972391593
Test loss: 1.0353985752525001



In [16]:
# Inference using a new dataloader (validation_dataloader for instance, creatded using ds.isel(slice(-3, -2)))

In [13]:
# # Move model and Train set to GPU
# lstm1.to(device)
# X_train_tensors_final = X_train_tensors_final.to(device)
# y_train_tensors       = y_train_tensors.to(device)
# # NB LSTM() model also moves tensors to device inside the class (for hidden tensors)

# # Training
# for epoch in range(2000):
#     outputs = lstm1.forward(X_train_tensors_final) # forward pass
#     optimizer.zero_grad() # caluclate the gradient, manually setting to 0

#     # obtain the loss function
#     loss = criterion(outputs, y_train_tensors)

#     loss.backward() # calculates the loss of the loss function
#     optimizer.step() #improve from loss, i.e backprop
    
#     if epoch % 100 == 0:
#         print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 



RuntimeError: input.size(-1) must be equal to input_size. Expected 5, got 20

In [None]:
# # Prepare inference data
# X_im, y_im         = du.create_dataset(ds_test.isel(storm=slice(-30, -20)),  params_of_interest, PARAMS)
# y_im               = np.array(y_im)
# X_im_tensors       = Variable(torch.Tensor(X_im))
# X_im_tensors_final = torch.reshape(X_im_tensors, (X_im_tensors.shape[0], 1, X_im_tensors.shape[1]))

# # Inference
# lstm1.eval()
# lstm1.cpu()
# with torch.no_grad():
#     pred = lstm1(X_im_tensors_final)

# # Plot
# for i, param in enumerate(params_of_interest):
#     feature = pred[:, i].cpu().detach().numpy()
#     feature = du.inverse_scale_normalize(feature, MU, SIG, SCALE, param)
#     target  = y_im[:, i]
#     target  = du.inverse_scale_normalize(target,  MU, SIG, SCALE, param)
#     plt.title(param, weight='bold')
#     plt.plot(feature, label='LSTM', color='tab:pink')
#     plt.plot(target,  label='Data', linewidth=3, color='tab:blue')
#     plt.legend(loc='upper left');plt.grid()
#     plt.show()