In [6]:
import pandas as pd
import numpy as np
import glob

import src.DataGrabber
from src.utils import *
import torch

In [7]:
from src.utils import *
from skimage.measure import block_reduce
from datetime import datetime

# plt.style.use('bmh')
# plt.style.use('dark_background')
# plt.style.use('Solarize_Light2')
set_gpytorch_settings(False)

# Reading data file and cleaning missing values
df = pd.read_feather(
    '../Data/feather/46221_9999_wave_height.feather')
parameters_wave = ['time', 'wave_height']
parameters_temp = ['time', 'sea_surface_temperature']
df_as_np = df \
    .loc[:, parameters_wave] \
    .astype(float) \
    .replace(
        to_replace=[999.0, 99.0, 9999.0],
        value=np.nan) \
    .to_numpy()
using_sk = block_reduce(
    df_as_np, block_size=(24, 1),
    func=np.mean).astype(float)

# Convert to torch tensors
X = torch \
    .tensor(using_sk[:-1, 0]) \
    .float() \
    .cuda()
y = torch \
    .tensor(using_sk[:-1, 1]) \
    .float() \
    .cuda()
X = X.reshape(-1, 1)
y = y.reshape(-1, 1)

X = X[~torch.any(y.isnan(), dim=1)]
y = y[~torch.any(y.isnan(), dim=1)]
y = y.flatten()
X_old = X


# Helper functions
def scaler(
        a,
        X_old=X_old,
        center=True):
    if center is True:
        a = a - X_old.min(0).values
    return a / (X_old.max(0).values - X_old.min(0).values)


def add_new_kernel_term(
        original_kernel, new_kernel_term, operation):
    return str(original_kernel) + str(operation) + str(new_kernel_term)


# GP Model Declaration
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(
            self,
            train_x_, train_y_,
            likelihood, kernel):
        super(ExactGPModel, self).__init__(train_x_, train_y_, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = kernel

    def forward(
            self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions \
            .MultivariateNormal(mean_x, covar_x)


# Scale the time axis and log transform the Y-values
X = scaler(X, X_old)
y = y.log()

# max, min, and scale factor declaration
scaler_max = X_old.max(0).values.item()
scaler_min = X_old.min(0).values.item()
scale_factor = scaler_max - scaler_min
scaler_consts = [scaler_max, scaler_min, scale_factor]

# Plot the block reduced data set
temp_for_plotting = pd.Series(
    using_sk[:-1, 0] * 1e9, dtype='datetime64[ns]')
# plt.plot(temp_for_plotting, using_sk[:-1, 1])
# plt.xlabel("Time (epoch)")
# plt.ylabel("Significant Wave Height (meters)")
# plt.title(f'Significant wave height - after block reducing')
# plt.show()

print(
    f'Scale Max: {scaler_max}\n '
    f'Scale Min: {scaler_min}\n '
    f'Scale Factor: {scale_factor}\n '
    f'Before Block Reduce: {df_as_np.shape}\n'
    f'After Block Reduce: {using_sk.shape}\n'
    f'Number of Nans: {np.count_nonzero(np.isnan(df_as_np))}\n'
    f'Start Time: {datetime.fromtimestamp(df_as_np[0, 0])}\n'
    f'End Time: {datetime.fromtimestamp(df_as_np[-1, 0])}\n'
    f'Number of Days: {df_as_np.shape[0] / 48}\n'
    f'Time Period (Days): {(df_as_np[-1, 0] - df_as_np[0, 0]) / 24 / 60 / 60}\n ')

# Prediction range, training and test set define (14, 3, 365)
predict_days_out = 3
test_n = 2 * predict_days_out

# Split the data into train and test sets
# *contiguous means they are sitting next to each other in memory*
# train_x = X[test_n:].cuda()
# train_y = y[test_n:].cuda()
# test_x = X[-test_n:].cuda()
# test_y = y[-test_n:].cuda()
train_x = X[test_n:].contiguous().cuda()
train_y = y[test_n:].contiguous().cuda()
test_x = X[-test_n:].contiguous().cuda()
test_y = y[-test_n:].contiguous().cuda()
#
# # Forecasting beyond horizon
# test_future_15 = torch.cat((X[-test_n:], (X[1:(test_n*5)]+1)), dim=0).contiguous().cuda()
# test_future_90 = torch.cat((X[-test_n:], (X[1:(test_n*30)]+1)), dim=0).contiguous().cuda()
# train_x = X[test_n:].cuda()
# train_y = y[test_n:].cuda()
# test_x = X[-test_n:].cuda()
# test_y = y[-test_n:].cuda()

# Forecasting beyond horizon
# test_future_15 = torch.cat((X[-test_n:], (X[1:(test_n*5)]+1)), dim=0).cuda()
# test_future_90 = torch.cat((X[-test_n:], (X[1:(test_n*30)]+1)), dim=0).cuda()
# print(test_future_15)
# print(test_future_90)
# print(test_x)

# Create a list of random starting indices for the subtest sets
n_total = train_x.shape[0]
np.random.seed(2023)
idx_list = np.random.randint(
    low=n_total / 2,
    high=n_total - test_n,
    size=10)


def make_idx_list(
        training_set_size,
        size_of_artificial_test_set,
        size_of_partitions=1000, seed=2023):
    np.random.seed(seed)
    return np.random.randint(
        low=training_set_size / 2,
        high=training_set_size - size_of_artificial_test_set,
        size=size_of_partitions)


# Generate the train_loader and train_dataset
train_loader, train_dataset, test_loader, test_dataset = create_train_loader_and_dataset(
    train_x, train_y, test_x, test_y)
data_compact = [
    train_x, train_y, test_x, test_y,
    train_loader, train_dataset,
    test_loader, test_dataset]

# List of possible Kernels operations
kernel_operations = ["+", "*"]

# List of possible Kernels terms
kernel_list = [
    # Periodic Kernels of Varying Period constraints
    "Per_Arb", "Per_Year", "Per_Season", "Per_Month", "Per_Week",
    # Random Fourier Features Kernel
    "RFF",
    # Varying Length Scales of the RBF Kernel
    "RQ",
    # Speciality Kernels
    "AR2", "Min",
    # Smoothing Kernels of the Matern class
    "RBF", "Mat_2.5", "Mat_1.5", "Mat_0.5",
]

# Initial Kernel Trial
kernel_str_running = "AR2*RFF"
# kernel_str_running = "RBF+AR2*Per_Year*RBF*Mat_1.5"

parameter_input = {
    "model_cls": ExactGPModel,
    "kernel": kernel_str_running,
    "train_x": data_compact[0],
    "train_y": data_compact[1],
    "test_x": data_compact[2],
    "test_y": data_compact[3],
    "scaler_min": scaler_consts[1],
    "scaler_max": scaler_consts[0],
    "num_iter": 1000,
    "lr": 0.01,
    "name": kernel_str_running,
    "save_loss_values": "save",
    "use_scheduler": True,
    "forecast_over_this_horizon": [4, 10], #None, #[test_future_15, test_future_90],
    "index_list_for_training_split": idx_list,
    "predict_ahead_this_many_steps": test_n,
}



Scale Max: 1677108352.0
 Scale Min: 1349069952.0
 Scale Factor: 328038400.0
 Before Block Reduce: (174818, 2)
After Block Reduce: (7285, 2)
Number of Nans: 0
Start Time: 2012-09-30 16:55:44
End Time: 2023-02-22 23:25:52
Number of Days: 3642.0416666666665
Time Period (Days): 3797.312592592593
 


In [26]:
list_of_past_models = glob.glob("./../Past_Trials/Model_States/*.pth")
state_dictionary = torch.load(list_of_past_models[0])
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = gpytorch.models.ExactGP(train_x, train_y, likelihood)  #, kernel=state_dictionary.cov)
model.load_state_dict(state_dictionary)

RuntimeError: Error(s) in loading state_dict for ExactGP:
	Unexpected key(s) in state_dict: "mean_module.raw_constant", "covar_module.raw_outputscale", "covar_module.base_kernel.raw_lengthscale", "covar_module.base_kernel.raw_alpha", "covar_module.base_kernel.raw_lengthscale_constraint.lower_bound", "covar_module.base_kernel.raw_lengthscale_constraint.upper_bound", "covar_module.base_kernel.raw_alpha_constraint.lower_bound", "covar_module.base_kernel.raw_alpha_constraint.upper_bound", "covar_module.raw_outputscale_constraint.lower_bound", "covar_module.raw_outputscale_constraint.upper_bound". 

In [25]:
state_dictionary

OrderedDict([('likelihood.noise_covar.raw_noise', tensor([-0.1000])),
             ('likelihood.noise_covar.raw_noise_constraint.lower_bound',
              tensor(1.0000e-04)),
             ('likelihood.noise_covar.raw_noise_constraint.upper_bound',
              tensor(inf)),
             ('mean_module.raw_constant', tensor(0.0960)),
             ('covar_module.kernels.0.raw_outputscale', tensor(-0.0998)),
             ('covar_module.kernels.0.base_kernel.raw_lengthscale',
              tensor([[0.0995]])),
             ('covar_module.kernels.0.base_kernel.raw_alpha',
              tensor([0.0987])),
             ('covar_module.kernels.0.base_kernel.raw_lengthscale_constraint.lower_bound',
              tensor(0.0003)),
             ('covar_module.kernels.0.base_kernel.raw_lengthscale_constraint.upper_bound',
              tensor(inf)),
             ('covar_module.kernels.0.base_kernel.raw_alpha_constraint.lower_bound',
              tensor(0.0003)),
             ('covar_module.kerne