In [1]:
# Importing the requried packages
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data import Dataset
import torch.nn.utils.prune as prune
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
from mpl_toolkits.axes_grid1.inset_locator import (inset_axes, InsetPosition, mark_inset)
import matplotlib
import pandas as pd 


from qbo1d import adsolver, utils
import netCDF4 as nc


import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
# import matplotlib
import netCDF4 as nc
import scipy.stats as st
import scipy

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was too old on your system - pyarrow 10.0.1 is the current minimum supported version as of this release.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# for scaling input or output
class GlobalScaler():
    def __init__(self, X):
        self.abs_max = X.std().max() #(0)
        
    def transform(self, X):
        return X / self.abs_max
    
    def inverse_transform(self, X):
        return X * self.abs_max

In [None]:
solver = adsolver.ADSolver(z_min=17e3, z_max=35e3, dz=500, t_min=0.0, t_max=365*100*86400, dt=86400.0, w =0.0, kappa=3e-1)

In [3]:
# loading the training data
data = xr.open_dataset('../QBO_0.5x_100yrs_500m.nc')
u = data.u[:-1, 1:-1].values
f = data.f[1:, 1:-1].values

trainset = TensorDataset(torch.from_numpy(u[:90*365]).float(),torch.from_numpy(f[:90*365]).float())
valset = TensorDataset(torch.from_numpy(u[90*365:]).float(),torch.from_numpy(f[90*365:]).float())

train_dataloader = DataLoader(trainset, batch_size=1000, shuffle=True, num_workers=4)
val_dataloader  = DataLoader(valset, batch_size= len(valset), shuffle=True, num_workers=4)

In [7]:
# loading the testing data
data = xr.open_dataset('/glade/derecho/scratch/pahlavan/qbo1d/Nonlocality-0.5x/QBO_0.5x_1000yrs_500m.nc')
u = data.u[:-1, 1:-1].values
f = data.f[1:, 1:-1].values

testset = TensorDataset(torch.from_numpy(u).float(),torch.from_numpy(f).float())
test_dataloader  = DataLoader(testset, batch_size= len(testset), shuffle=False, num_workers=4)

In [19]:
# MLP arcitecture

nch = 156
actv = nn.Tanh()


class FullyConnected(nn.Module):
    def __init__(self, solver, scaler_X=None, scaler_Y=None):
        super(FullyConnected, self).__init__()

        self.scaler_X = scaler_X
        self.scaler_Y = scaler_Y

        nlev = solver.z.shape[0]
        
        self.linear_stack = nn.Sequential(
            nn.Linear(in_features=nlev-2, out_features=nch),
            actv,
            nn.Linear(in_features=nch, out_features=nch),
            actv,
            nn.Linear(in_features=nch, out_features=nch),
            actv,
            nn.Linear(in_features=nch, out_features=nlev-2)
        )

    def forward(self, X):
        return self.linear_stack(X[:, None]).squeeze()

In [24]:
model = (FullyConnected(solver)).float()
device = "cpu"
print(device)
model.to(device)

cpu


FullyConnected(
  (linear_stack): Sequential(
    (0): Linear(in_features=35, out_features=156, bias=True)
    (1): Tanh()
    (2): Linear(in_features=156, out_features=156, bias=True)
    (3): Tanh()
    (4): Linear(in_features=156, out_features=156, bias=True)
    (5): Tanh()
    (6): Linear(in_features=156, out_features=35, bias=True)
  )
)

In [25]:
#nparams
sum(p.numel() for p in model.parameters() if p.requires_grad)

60095

In [13]:
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
            
            # save model
            torch.save(model.state_dict(), '../MLP_4L_156n_0.5x_500m.pth')

        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [14]:
# training loop
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    avg_loss = 0
    for batch, (X, Y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X.to(device).float())
        loss = loss_fn(pred.float(), scaler_Y.transform(Y.to(device)).float())

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
   
        with torch.no_grad():
            avg_loss += loss.item()
            
    avg_loss /= len(dataloader)
    
    return avg_loss


# validating loop
def val_loop(dataloader, model, loss_fn):
    avg_loss = 0
    with torch.no_grad():
        for batch, (X, Y) in enumerate(dataloader):
            # Compute prediction and loss
            pred = model(X.to(device).float())
            loss = loss_fn(pred.float(), scaler_Y.transform(Y.to(device)).float())
            avg_loss += loss.item()
            
    avg_loss /= len(dataloader)
    
    return avg_loss

In [15]:
train_losses = []
val_losses = []

In [33]:
learning_rate = 1e-2
epochs = 6000
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scaler_Y = GlobalScaler(trainset[:][1])

In [34]:
# training
k = 0
early_stopper = EarlyStopper(patience=40, min_delta=0.0)
for t in range(epochs):
    train_loss = train_loop(train_dataloader, model, nn.MSELoss(), optimizer)
    train_losses.append(train_loss)
    val_loss = val_loop(val_dataloader, model, nn.MSELoss())
    val_losses.append(val_loss)
    if t % 10 ==0:
        print(f"Epoch {t+1}\n-------------------------------")
        print(val_loss)
        # print(train_loss)
    if early_stopper.early_stop(val_loss):
        if k <8:
            early_stopper = EarlyStopper(patience=40, min_delta=0.0)
            learning_rate = learning_rate * 0.25
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
            k += 1
            print("New Learning Rate: " + str(learning_rate))
        else:
            break
print("Done!")

Epoch 1
-------------------------------
0.031848274171352386
Epoch 11
-------------------------------
0.01798270083963871
Epoch 21
-------------------------------
0.01940186135470867
Epoch 31
-------------------------------
0.018415361642837524
Epoch 41
-------------------------------
0.017620276659727097
Epoch 51
-------------------------------
0.01802794635295868
Epoch 61
-------------------------------
0.018582139164209366
Epoch 71
-------------------------------
0.018108122050762177
Epoch 81
-------------------------------
0.019032729789614677
Epoch 91
-------------------------------
0.019124334678053856
Epoch 101
-------------------------------
0.01948104240000248
New Learning Rate: 0.0025
Epoch 111
-------------------------------
0.014225199818611145
Epoch 121
-------------------------------
0.014129542745649815
Epoch 131
-------------------------------
0.014381526038050652
Epoch 141
-------------------------------
0.014604523777961731
New Learning Rate: 0.000625
Epoch 151
------