In [1]:
import gpytorch
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
import torch

from utils.data_loaders import get_wake_datasets
from utils.evaluation import evaluate_model

In [2]:
FACTORS_FOLDER = "discr_factors_x2_30_y-2_2_step0.125_TIstep0.01_CTstep0.01"
DATA_FOLDER = f"data/{FACTORS_FOLDER}/"
INPUT_VAR_TO_TRAIN_REDUCTION_FACTOR = {'ti': 4, 'ct': 4}
train_reduc_factor_string = "training_factors=" + "-".join([f"{k}{v}" for k, v in INPUT_VAR_TO_TRAIN_REDUCTION_FACTOR.items()])
MODEL_NAME = f"univariate_skGP_{train_reduc_factor_string}"
#BEST_MODEL_PATH = f"saved_models/{FACTORS_FOLDER}/{MODEL_NAME}.pt"
CONSIDER_WS = False
COORDS_AS_INPUT = True # univariate setting

if CONSIDER_WS:
    MODEL_NAME += "_consider_ws"
    FACTORS_FOLDER = FACTORS_FOLDER.replace("TIstep0.01_CTstep0.01", "reducedTI-CT")
    BEST_MODEL_PATH = f"saved_models/{FACTORS_FOLDER}/{MODEL_NAME}.pt"

MODEL_DESCRIPTION = f"{MODEL_NAME}_{FACTORS_FOLDER} standard multivariate GP" #TODO
print(f"Model description: {MODEL_DESCRIPTION}")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"{DEVICE=}")

# hyperparameters
#EPOCHS = 2 #TODO
#LR = 0.0001

Model description: univariate_skGP_training_factors=ti4-ct4_discr_factors_x2_30_y-2_2_step0.125_TIstep0.01_CTstep0.01 standard multivariate GP
DEVICE=device(type='cpu')


In [3]:
valid_dataset = None
train_dataset, valid_dataset, test_dataset = get_wake_datasets(DATA_FOLDER,
                                                          consider_ws=CONSIDER_WS,
                                                          coords_as_input=COORDS_AS_INPUT,
                                                          #train_perc=0.6,
                                                          #test_perc=0.3,
                                                          #validation_perc=0.1
                                                          input_var_to_train_reduction_factor=INPUT_VAR_TO_TRAIN_REDUCTION_FACTOR)

train_x, train_y = train_dataset.inputs, train_dataset.outputs.view(-1)
test_x, test_y = test_dataset.inputs, test_dataset.outputs.view(-1)
print("Train shapes: ", train_x.shape, train_y.shape)
print("Test shapes: ", test_x.shape, test_y.shape)

if valid_dataset is not None:
    valid_x, valid_y = valid_dataset.inputs, valid_dataset.outputs.view(-1)
    print("Valid shapes: ", valid_x.shape, valid_y.shape)

del valid_x, valid_y, valid_dataset

Ignoring percentages of train-valid-test split (train_perc=0.8, valid_perc=0, test_perc=0.2)
and using the reduction factors for the training set instead:
{'ti': 4, 'ct': 4}


Train shapes:  torch.Size([3942400, 4]) torch.Size([3942400])
Test shapes:  torch.Size([28851200, 4]) torch.Size([28851200])
Valid shapes:  torch.Size([28851200, 4]) torch.Size([28851200])


In [5]:
from utils.data_utils import get_parameters_from
from utils.pywake_utils import get_grid_shape
x_start, x_end, y_start, y_end, grid_factor, _, _ = get_parameters_from(DATA_FOLDER)
grid_shape = get_grid_shape(x_start, x_end, y_start, y_end, grid_factor)
grid_size = grid_shape[0] * grid_shape[1]
print(grid_size)

"""
REDUCE TRAINING DATA
"""
num_instances = 1000 #TODO
num_features = 4
train_x= torch.split(train_x, grid_size * num_instances)[0]#.reshape(num_instances, grid_size, num_features)
train_y = torch.split(train_y, grid_size * num_instances)[0]#.reshape(num_instances, grid_size)
print(train_x.shape, train_y.shape)

7168


'\nREDUCE TRAINING DATA\nnum_instances =  #TODO\nnum_features = 4\ntrain_x= torch.split(train_x, grid_size * num_instances)[0]#.reshape(num_instances, grid_size, num_features)\ntrain_y = torch.split(train_y, grid_size * num_instances)[0]#.reshape(num_instances, grid_size)\nprint(train_x.shape, train_y.shape)\n'

In [6]:
kernel = RBF(length_scale=1.0)
gp = GaussianProcessRegressor(kernel=kernel)
gp.fit(train_x, train_y)

: 

: 

In [None]:
"""
REDUCE TEST DATA
"""
num_instances = 100
test_x_= torch.split(test_x, grid_size * num_instances)[0]#.reshape(num_instances, grid_size, num_features)
test_y_= torch.split(test_y, grid_size * num_instances)[0]#.reshape(num_instances, grid_size)
print(test_x.shape, test_y.shape)
print(test_x_.shape, test_y_.shape)

In [None]:
# evaluation on trainset
save_results = False
evaluate_model(gp, data=(train_x, train_y), data_type='train',
               model_description=MODEL_DESCRIPTION,
               save_results=save_results)

print("\n")

# evalution on testset
evaluate_model(gp, data=(test_x, test_y), data_type='test',
               model_description=MODEL_DESCRIPTION,
               save_results=save_results)

Train results for univariate_skGP_training_factors=ti4-ct4_discr_factors_x2_30_y-2_2_step0.125_TIstep0.01_CTstep0.01 standard multivariate GP
r2_score=1.0
explained_variance_score=1.0
mean_squared_error=5.7518825484673966e-24
mean_absolute_error=1.3250620437038523e-12
median_absolute_error=1.4278227038200253e-13
mean_absolute_percentage_error=1.0000000915781452e-10
Prediction time=0.001621548768265971s


Test results for univariate_skGP_training_factors=ti4-ct4_discr_factors_x2_30_y-2_2_step0.125_TIstep0.01_CTstep0.01 standard multivariate GP


: 

: 

In [None]:
# the simplest form of GP model, exact inference
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([grid_size]))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.MaternKernel(batch_shape=torch.Size([grid_size])),
            batch_shape=torch.Size([grid_size])
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# initialize likelihood and model
likelihood = gpytorch.likelihoods.GaussianLikelihood(batch_shape=torch.Size([grid_size]))
model = ExactGPModel(train_x, train_y, likelihood)
# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

In [None]:
for i in range(EPOCHS):
    # Zero gradients from previous iteration
    optimizer.zero_grad()
    # Output from model
    output = model(train_x)
    # Calc loss and backprop gradients
    loss = -mll(output, train_y)
    try:
        print(loss.shape)
    except:
        pass
    loss.backward()
    lengthscale = model.covar_module.base_kernel.lengthscale.item()
    noise = model.likelihood.noise.item()
    print(f"Iter {i+1}/{EPOCHS} - Loss: {loss.item:.3f}\tlengthscale: {lengthscale:.3f}\tnoise: {noise:.3f}")
    optimizer.step()

: 

: 