In [1]:
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib import colors
from matplotlib.ticker import MultipleLocator
import os.path
from pathlib import Path
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

from qbo1d import utils
from qbo1d import adsolver
from qbo1d import emulate
from qbo1d.stochastic_forcing import WaveSpectrum

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from qbo1d.emulate import relative_MSELoss

## Data Loading

### Use the PyTorch dataloader form for sratch

In [15]:
dir_name = "./data/direct/"
file_name = "control.nc"

data = emulate.QBODataset(file_path=dir_name + file_name)

num_split = int((len(data))/5)

# Here use batch_size = 1
batch_size = 1

split_data = torch.utils.data.random_split(data, [len(data)-num_split, num_split], generator=torch.Generator().manual_seed(42))
train_dataloader = DataLoader(split_data[0], batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(split_data[1], batch_size=batch_size, shuffle=True)



In [16]:
train_length = len(train_dataloader)
for i, (X, Y) in enumerate(train_dataloader):
    train_width = X.shape[1]
    break

test_length = len(test_dataloader)
for i, (X, Y) in enumerate(test_dataloader):
    test_width = X.shape[1]
    break

print(f"Train Array Size ({train_length}, {train_width})")
print(f"Test Array Size ({test_length}, {test_width})")

Train Array Size (27648, 73)
Test Array Size (6912, 73)


### Transform it to the naive numpy version

In [17]:
train_X = np.zeros((train_length, train_width))
train_Y = np.zeros((train_length, train_width))

test_X = np.zeros((test_length, test_width))
test_Y = np.zeros((test_length, test_width))

for i, (X, Y) in enumerate(train_dataloader):
    train_X[i] = X
    train_Y[i] = Y

for i, (X, Y) in enumerate(test_dataloader):
    test_X[i] = X
    test_Y[i] = Y
    

### Linear Regression by sklearn

In [19]:
reg = LinearRegression().fit(train_X, train_Y)
outcome = reg.predict(test_X)
mean_l2_loss = np.linalg.norm((outcome - test_Y), axis=1).mean()
print(mean_l2_loss)

# Here we use the built-in score function to do the lasso and ridge regression, though I don't like it.
# It seems that Linear regression has great performance for offline training.



2.029500965941119e-05


In [22]:
torch.set_default_dtype(torch.float64)
solver = adsolver.ADSolver(t_max=360*96*86400, w=3e-4)

# Set up the linear regression model AX + B

model = lambda x: torch.from_numpy(reg.coef_) @ x + torch.from_numpy(reg.intercept_)

u = solver.solve(source_func=model)
u = u.detach()
print("The UN - truncated u has length: ", len(u))
nan_boolean = torch.tensor([not u[i].isnan().any() for i in range(len(u))]) # Boolean array to detect where u blows up
u = u[nan_boolean]

print("The truncated u has length: ", len(u))
solver.time = solver.time[nan_boolean]


# amp25 = utils.estimate_amplitude(solver.time, solver.z, u, height=25e3, spinup=spinup_time)
# amp20 = utils.estimate_amplitude(solver.time, solver.z, u, height=20e3, spinup=spinup_time)
# tau25 = utils.estimate_period(solver.time, solver.z, u, height=25e3, spinup=spinup_time)

The UN - truncated u has length:  34561
The truncated u has length:  2021


While use it for online traning
we can see it blows up after $\tau=3000$. 
BAD online performance