In [3]:
import os
import numpy as np
import torch
import torch.nn.functional
from torch.autograd import Variable
import sys
# import convlstm
sys.path.append("../src")

import convlstm


In [4]:
input_channels = 10 # number of input channels e.g. concentration heatmap, current, wind curl, etc.
hidden_channels = [10, 5, 1] # the last digit is the output channel
kernel_size = 3
batch_size = 1
learning_rate = 0.01
num_epochs = 20

In [5]:
print ("Pytorch version {}".format(torch.__version__))
# check if CUDA is available
use_cuda = torch.cuda.is_available()
# use GPU if possible
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device to be used for computation: {}".format(device))

Pytorch version 1.8.1
Device to be used for computation: cpu


In [7]:
# initialize our model
model = convlstm.ConvLSTM(input_channels, hidden_channels, kernel_size).cpu()
# choose loss function
loss_fn = torch.nn.MSELoss(size_average=True)
# choose optimizer
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)
# check the model / loss function and optimizer
print(model)
print(loss_fn)
print(optimiser)

ConvLSTM(
  (cell0): ConvLSTMCell(
    (Wxi): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Whi): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (Wxf): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Whf): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (Wxc): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Whc): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (Wxo): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Who): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )
  (cell1): ConvLSTMCell(
    (Wxi): Conv2d(10, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Whi): Conv2d(5, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (Wxf): Conv2d(10, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Whf): Conv2d(5, 5, kernel_si



In [8]:
input_vector = Variable(torch.randn(1, 10, 5, 1)).to(device)
target_vector = Variable(torch.randn(1, 10, 5, 1)).double().to(device)

In [None]:
#training

In [None]:
hist = np.zeros(num_epochs)
    # loop of epoch
    for t in range(num_epochs):
        # Clear stored gradient
        model.zero_grad()
        # loop of timestep
        for timestep in range(sequence_len - cross_valid_year*12*4 - test_year*12*4):
            # hidden state re-initialized inside the model when timestep=0
            #################################################################################
            ########          create input tensor with multi-input dimension         ########
            #################################################################################
            # create variables
            x_input = np.stack((sic_exp_norm[timestep,:,:],
                                ohc_exp_norm[timestep,:,:],
                                t2m_exp_norm[timestep,:,:],
                                slp_exp_norm[timestep,:,:],
                                z500_exp_norm[timestep,:,:],
                                z850_exp_norm[timestep,:,:],
                                u10m_exp_norm[timestep,:,:],
                                v10m_exp_norm[timestep,:,:],
                                sflux_exp_norm[timestep,:,:],
                                month_exp[timestep,:,:])) #vstack,hstack,dstack
            x_var = torch.autograd.Variable(torch.Tensor(x_input).view(-1,input_channels,height,width)).cuda()
            #################################################################################
            ########       create training tensor with multi-input dimension         ########
            #################################################################################
            y_train_stack = sic_exp_norm[timestep+1,:,:] #vstack,hstack,dstack
            y_var = torch.autograd.Variable(torch.Tensor(y_train_stack).view(-1,hidden_channels[-1],height,width)).cuda()
            #################################################################################   
            # Forward pass
            y_pred, _ = model(x_var, timestep)
            # choose training data
            y_train = y_var        
            # torch.nn.functional.mse_loss(y_pred, y_train) can work with (scalar,vector) & (vector,vector)
            # Please Make Sure y_pred & y_train have the same dimension
            # accumulate loss
            if timestep == 0:
                loss = loss_fn(y_pred, y_train)
            else:
                loss += loss_fn(y_pred, y_train)
            #print (timestep)
        #print(y_pred.shape)
        #print(y_train.shape)
        # print loss at certain iteration
        if t % 5 == 0:
            print("Epoch ", t, "MSE: ", loss.item())
            #print(y_pred)
            # gradient check
            # Gradcheck requires double precision numbers to run
            #res = torch.autograd.gradcheck(loss_fn, (y_pred.double(), y_train.double()), eps=1e-6, raise_exception=True)
            #print(res)
        hist[t] = loss.item()

        # Zero out gradient, else they will accumulate between epochs
        optimiser.zero_grad()

        # Backward pass
        loss.backward()

        # Update parameters
        optimiser.step()
        
    torch.save(model, os.path.join(output_path,'convlstm_monteCarlo.pkl')) # save lstm model

In [9]:
#testing

In [None]:
# time series before test data
    pred_base_sic = sic_exp_norm[:-test_year*12*4,:,:]
    # predict x steps ahead
    step_lead = 16 # unit week
    # create a matrix for the prediction
    lead_pred_sic = np.zeros((test_year*12*4,step_lead,height,width),dtype=float) # dim [predict time, lead time, lat, lon]
    # start the prediction loop
    for step in range(test_year*12*4):
        # Clear stored gradient
        model.zero_grad()
        # Don't do this if you want your LSTM to be stateful
        # Otherwise the hidden state should be cleaned up at each time step for prediction (we don't clear hidden state in our forward function)
        # see example from (https://github.com/pytorch/examples/blob/master/time_sequence_prediction/train.py)
        # model.hidden = model.init_hidden()
        # based on the design of this module, the hidden states and cell states are initialized when the module is called.
        for i in np.arange(1,sequence_len-test_year*12*4 + step + step_lead,1): # here i is actually the time step (index) of prediction, we use var[:i] to predict var[i]
            #############################################################################
            ###############           before time of prediction           ###############
            #############################################################################
            if i <= (sequence_len-test_year*12*4 + step):
                # create variables
                x_input = np.stack((sic_exp_norm[i-1,:,:],
                                    ohc_exp_norm[i-1,:,:],
                                    t2m_exp_norm[i-1,:,:],
                                    slp_exp_norm[i-1,:,:],
                                    z500_exp_norm[i-1,:,:],
                                    z850_exp_norm[i-1,:,:],
                                    u10m_exp_norm[i-1,:,:],
                                    v10m_exp_norm[i-1,:,:],
                                    sflux_exp_norm[i-1,:,:],
                                    month_exp[i-1,:,:])) #vstack,hstack,dstack
                x_var_pred = torch.autograd.Variable(torch.Tensor(x_input).view(-1,input_channels,height,width),
                                                     requires_grad=False).cuda()
                # make prediction
                last_pred, _ = model(x_var_pred, i-1)
                # record the real prediction after the time of prediction
                if i == (sequence_len-test_year*12*4 + step):
                    lead = 0
                    # GPU data should be transferred to CPU
                    lead_pred_sic[step,0,:,:] = last_pred[0,0,:,:].cpu().data.numpy()
            #############################################################################
            ###############            after time of prediction           ###############
            #############################################################################
            else:
                lead += 1
                # prepare predictor
                if i <= sequence_len:
                    # use the predicted data to make new prediction
                    x_input = np.stack((lead_pred_sic[step,i-(sequence_len-test_year*12*4 + step +1),:,:],
                                        ohc_exp_norm[i-1,:,:],
                                        t2m_exp_norm[i-1,:,:],
                                        slp_exp_norm[i-1,:,:],
                                        z500_exp_norm[i-1,:,:],
                                        z850_exp_norm[i-1,:,:],
                                        u10m_exp_norm[i-1,:,:],
                                        v10m_exp_norm[i-1,:,:],
                                        sflux_exp_norm[i-1,:,:],
                                        month_exp[i-1,:,:])) #vstack,hstack,dstack
                else: # choice_exp_norm out of range, use the last value
                    x_input = np.stack((lead_pred_sic[step,i-(sequence_len-test_year*12*4 + step +1),:,:],
                                        ohc_exp_norm[-1,:,:],
                                        t2m_exp_norm[-1,:,:],
                                        slp_exp_norm[-1,:,:],
                                        z500_exp_norm[-1,:,:],
                                        z850_exp_norm[-1,:,:],
                                        u10m_exp_norm[-1,:,:],
                                        v10m_exp_norm[-1,:,:],
                                        sflux_exp_norm[-1,:,:],
                                        month_exp[i-1,:,:])) #vstack,hstack,dstack                    
                x_var_pred = torch.autograd.Variable(torch.Tensor(x_input).view(-1,input_channels,height,width),
                                                     requires_grad=False).cuda()        
                # make prediction
                last_pred, _ = model(x_var_pred, i-1)
                # record the prediction
                lead_pred_sic[step,lead,:,:] = last_pred[0,0,:,:].cpu().data.numpy()