Necessary imports

In [None]:
import os
import sys
import matplotlib.pyplot as plt
%matplotlib inline
import torch
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import r2_score
import gpytorch
from gpytorch.models import ApproximateGP
from torch.distributions import Normal
import optuna
from optuna.trial import TrialState
import random
from sklearn.cluster import KMeans

# define the device for the setting
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# check the computer name and set the path accordingly
if os.environ['COMPUTERNAME'] == 'FYNN':            # name of surface PC
    sys.path.append(r'C:\Users\Surface\Masterarbeit')
elif os.environ['COMPUTERNAME'] == 'FYNNS-PC':  # desktop name
    sys.path.append(r'C:\Users\test\Masterarbeit')
    
else:
    raise ValueError("Unbekannter Computername: " + os.environ['COMPUTERNAME'])

import utils.data_prep as data_prep
import utils.metrices as metrices

In [None]:
SEED = 42

def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(SEED)

Transformation Pipeline for Approximating GPR

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test, feature_names = data_prep.load_tranform_and_split_data('C1_V01_delta_kan', split_ratio=(0.6, 0.2, 0.2))

# convert the data to PyTorch tensors
# and add an extra dimension for the target variable
X_train_tensor = torch.from_numpy(X_train).float()
X_val_tensor = torch.from_numpy(X_val).float()
X_test_tensor = torch.from_numpy(X_test).float() 
y_train_tensor = torch.from_numpy(y_train).float() # Add extra dimension for compatibility
y_val_tensor = torch.from_numpy(y_val).float()
y_test_tensor = torch.from_numpy(y_test).float()
print(X_train_tensor.shape, X_val_tensor.shape, X_test_tensor.shape)
print(feature_names)

if torch.cuda.is_available():
    X_train_tensor = X_train_tensor.cuda()
    X_val_tensor = X_val_tensor.cuda()
    X_test_tensor = X_test_tensor.cuda()
    y_train_tensor = y_train_tensor.cuda()
    y_val_tensor = y_val_tensor.cuda()
    y_test_tensor = y_test_tensor.cuda()

Stochastic Variational GP Regression Implementation

Natural Gradient Descent with Variational Models for better and faster convergence

In [None]:
#create a TensorDataset and DataLoader for the training data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False)

class GPModel(ApproximateGP):
    def __init__(self, inducing_points, kernel):
        variational_distribution = gpytorch.variational.NaturalVariationalDistribution(inducing_points.size(0))
        variational_strategy = gpytorch.variational.VariationalStrategy(
            self, inducing_points, variational_distribution, learn_inducing_locations=True
        )
        super(GPModel, self).__init__(variational_strategy)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module  = kernel

        self.mean_module.initialize(constant=y_train_tensor.mean().item())  # Initialize the mean to the mean of the training targets
        # self.covar_module.base_kernel.lengthscale = 1
        # y_var = y_train_tensor.var(unbiased=False)
        # self.covar_module.outputscale = y_var.item()
        #self.covar_module.outputscale = outputscale  # Initialize the outputscale to the variance of the training targets
                
    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

print(y_train_tensor.var(unbiased=False).item())

Create Optuna Study

In [None]:
def objective(trial):

    set_seed(SEED)
    # Estimate the variance of the training targets for initializing the likelihood noise
    y_var = y_train_tensor.var(unbiased=False).item()
    noise = 1e-2 * y_var

    # Suggest hyperparameters for optimization
    lr_ngd = trial.suggest_float('lr', 1e-3, 1e-1, log=True)
    lr_adam = trial.suggest_float('lr_adam', 1e-3, 1e-1, log=True)

    #Rational Quadratic Kernel
    rational_quadratic_kernel = gpytorch.kernels.RQKernel(ard_num_dims=X_train.shape[1], 
                                                        alpha_constraint=gpytorch.constraints.Interval(0.1, 10.0))
    rational_quadratic_kernel.lengthscale = torch.ones(X_train.shape[1])
    rational_quadratic_kernel.outputscale = y_var
    rational_quadratic_kernel1 = gpytorch.kernels.ScaleKernel(rational_quadratic_kernel)

    kernel = rational_quadratic_kernel1

    kernel_name = type(kernel.base_kernel).__name__
    #filename = f'Modelsaves/Apprx_GP_{kernel_name}_{idx}.pth'
    print(kernel_name)

    # Define the inducing points
    # Randomly select 1000 inducing points from the training data
    num_inducing_points = 1000

    kmeans = KMeans(n_clusters=num_inducing_points, random_state=42).fit(X_train)
    inducing_points = torch.from_numpy(kmeans.cluster_centers_).float()

    model = GPModel(inducing_points = inducing_points, kernel=kernel)
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    likelihood.noise = noise

    print(f'Model: {model}')
    print(f'Mean: {model.mean_module.constant.item()}')
    print(f'Lengthscale: {model.covar_module.base_kernel.lengthscale}')
    print(f'Outputscale: {model.covar_module.outputscale}')
    print(f'Likelihood Noise: {likelihood.noise.item()}')
    if torch.cuda.is_available():
        model = model.cuda()
        likelihood = likelihood.cuda()

    num_epochs = 100

    model.train()
    likelihood.train()

    variational_ngd_optimizer = gpytorch.optim.NGD(model.variational_parameters(), num_data= y_train_tensor.size(0), lr=lr_ngd)

    hyperparameter_optimizer = torch.optim.Adam([
        {'params': model.hyperparameters()},
        {'params': likelihood.parameters()},
    ], lr=lr_adam)

    # VariationalELBO is used for training
    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_loader.dataset))

    best_val_loss = np.inf
    patience = 10
    epochs_no_improve = 0
    decimal_places = 3
    tolerance = 10 ** (-decimal_places)

    for epoch in range(num_epochs):
        epoch_loss = 0.0
        model.train()
        likelihood.train()
        for x_batch, y_batch in train_loader:
            # Zero gradients from previous iteration        
            variational_ngd_optimizer.zero_grad()
            hyperparameter_optimizer.zero_grad()
            output = model(x_batch)
            loss = -mll(output, y_batch)
            # Print the loss for every tenth batch
            loss.backward()
            variational_ngd_optimizer.step()
            hyperparameter_optimizer.step()

            epoch_loss += loss.item()
        # print every ten epochs
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss / len(train_loader):.4f}')         # Print the average loss for the epoch

        model.eval()
        likelihood.eval()
        
        with torch.no_grad():
            f_val_preds = model(X_val_tensor)
            val_loss = -mll(f_val_preds, y_val_tensor).item()  
            
            val_preds = likelihood(f_val_preds)     
            val_mean = val_preds.mean.cpu()
            val_std = val_preds.stddev.cpu()
            val_std = val_std.clamp_min(1e-6)
            nll_per_point = -Normal(val_mean, val_std).log_prob(y_val_tensor.cpu()).numpy()
            val_nll = nll_per_point.mean().item()
            
            r2_score_val = r2_score(y_val, val_mean)
            if (epoch + 1) % 10 == 0:
                print(f'Validation Loss: {val_loss} Validation NLL: {val_nll} R²: {r2_score_val:.3f}')

            trial.report(val_loss, step=epoch)

            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()

            if abs(val_nll - best_val_loss) < tolerance:

                epochs_no_improve += 1
            else:
                epochs_no_improve = 0
                best_val_loss = val_loss
                # best_model_state = model.state_dict()
                # best_likelihood_state = likelihood.state_dict()   
            
            if epochs_no_improve >= patience:
                print(f'Early stopping at iteration {epoch + 1} with best validation loss: {best_val_loss:.3f}')
                # model.load_state_dict(best_model_state)
                # likelihood.load_state_dict(best_likelihood_state)

                break
    return best_val_loss

Execute Optuna Study

In [None]:
print(f"Using device: {device}")

# create a study object for Optuna
study = optuna.create_study(

    direction="minimize",
    sampler=optuna.samplers.TPESampler(seed = SEED),                       #TPE (Tree-structured Parzen Estimator) sampler by default
    pruner=optuna.pruners.MedianPruner(        
        n_startup_trials=5,                                    # Number of trials to run before pruning starts
        n_warmup_steps=5                                        # Number of warmup steps before pruning starts)
    )
)

# move the tensors to the device
X_train_tensor = X_train_tensor.to(device)
y_train_tensor = y_train_tensor.to(device)
X_val_tensor = X_val_tensor.to(device)
y_val_tensor = y_val_tensor.to(device)

# optimize the objective function with Optuna
# timeout=None means no time limit for the optimization, all trials will be run
study.optimize(objective, n_trials=50, timeout=None, n_jobs=1, show_progress_bar=True)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Train best Model

In [None]:
#set_seed(SEED)
# Estimate the variance of the training targets for initializing the likelihood noise
y_var = y_train_tensor.var(unbiased=False).item()
noise = 1e-2 * y_var

#Rational Quadratic Kernel
rational_quadratic_kernel = gpytorch.kernels.RQKernel(ard_num_dims=X_train.shape[1], 
                                                    alpha_constraint=gpytorch.constraints.Interval(0.1, 10.0))
rational_quadratic_kernel.lengthscale = torch.ones(X_train.shape[1])
rational_quadratic_kernel.outputscale = y_var
rational_quadratic_kernel1 = gpytorch.kernels.ScaleKernel(rational_quadratic_kernel)

kernel = rational_quadratic_kernel1

kernel_name = type(kernel.base_kernel).__name__
#filename = f'Modelsaves/Apprx_GP_{kernel_name}_{idx}.pth'
print(kernel_name)

# Define the inducing points
# Randomly select 1000 inducing points from the training data
num_inducing_points = 2000

kmeans = KMeans(n_clusters=num_inducing_points, random_state=42).fit(X_train)
inducing_points = torch.from_numpy(kmeans.cluster_centers_).float()

model = GPModel(inducing_points = inducing_points, kernel=kernel)
likelihood = gpytorch.likelihoods.GaussianLikelihood()
likelihood.noise = noise

print(f'Model: {model}')
print(f'Mean: {model.mean_module.constant.item()}')
print(f'Lengthscale: {model.covar_module.base_kernel.lengthscale}')
print(f'Outputscale: {model.covar_module.outputscale}')
print(f'Likelihood Noise: {likelihood.noise.item()}')
if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

num_epochs = 100

model.train()
likelihood.train()

variational_ngd_optimizer = gpytorch.optim.NGD(model.variational_parameters(), num_data= y_train_tensor.size(0), lr=0.03)

hyperparameter_optimizer = torch.optim.Adam([
    {'params': model.hyperparameters()},
    {'params': likelihood.parameters()},
], lr=0.04)

# VariationalELBO is used for training
mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_loader.dataset))

best_val_loss = np.inf
patience = 10
epochs_no_improve = 0
decimal_places = 3
tolerance = 10 ** (-decimal_places)

for epoch in range(num_epochs):
    epoch_loss = 0.0
    model.train()
    likelihood.train()
    for x_batch, y_batch in train_loader:
        # Zero gradients from previous iteration        
        variational_ngd_optimizer.zero_grad()
        hyperparameter_optimizer.zero_grad()
        output = model(x_batch)
        loss = -mll(output, y_batch)
        # Print the loss for every tenth batch
        loss.backward()
        variational_ngd_optimizer.step()
        hyperparameter_optimizer.step()

        epoch_loss += loss.item()
    # print every ten epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss / len(train_loader):.4f}')         # Print the average loss for the epoch

    model.eval()
    likelihood.eval()
    
    with torch.no_grad():
        f_val_preds = model(X_val_tensor)
        val_loss = -mll(f_val_preds, y_val_tensor).item()  
        
        val_preds = likelihood(f_val_preds)     
        val_mean = val_preds.mean.cpu()
        val_std = val_preds.stddev.cpu()
        val_std = val_std.clamp_min(1e-6)
        nll_per_point = -Normal(val_mean, val_std).log_prob(y_val_tensor.cpu()).numpy()
        val_nll = nll_per_point.mean().item()
        
        r2_score_val = r2_score(y_val, val_mean)
        if (epoch + 1) % 10 == 0:
            print(f'Validation Loss: {val_loss} Validation NLL: {val_nll} R²: {r2_score_val:.3f}')

        if abs(val_nll - best_val_loss) < tolerance:

            epochs_no_improve += 1
        else:
            epochs_no_improve = 0
            best_val_loss = val_loss
            # best_model_state = model.state_dict()
            # best_likelihood_state = likelihood.state_dict()   
        
        if epochs_no_improve >= patience:
            print(f'Early stopping at iteration {epoch + 1} with best validation loss: {best_val_loss:.3f}')
            # model.load_state_dict(best_model_state)
            # likelihood.load_state_dict(best_likelihood_state)

            break

Make Predictions

In [None]:
import uncertainty_toolbox as uct
print(model)
print(model.mean_module.constant.item())
print(model.covar_module.base_kernel.lengthscale)
print(model.covar_module.outputscale)
print(likelihood.noise.item())
model.eval()
likelihood.eval()
means = []
variances = []
stddevs = []
with torch.no_grad():
    # Make predictions on the test set

    preds = likelihood(model(X_test_tensor))
    
    # Mean:
    means.append(preds.mean.cpu())
    
    # Variance
    variances.append(preds.variance.cpu())
    
    # Standard Deviation
    stddevs.append(preds.stddev.cpu())

means = torch.cat(means)
variances = torch.cat(variances)
stddevs = torch.cat(stddevs)  

pnn_metrics = uct.metrics.get_all_metrics(means.numpy(), stddevs.numpy(), y_test)
print(pnn_metrics)

# use own function to calculate coverage and MPIW
ev_intervals = metrices.evaluate_intervals(means.numpy(), stddevs.numpy(), y_test, coverage=0.95)
print(f'coverage: {ev_intervals["coverage"]}, MPIW: {ev_intervals["MPIW"]}')

# calibration Curve with UCT
uct.viz.plot_calibration(means.numpy(), stddevs.numpy(), y_test)

# adversarial group calibration
uct.viz.plot_adversarial_group_calibration(means.numpy(), stddevs.numpy(), y_test)

# check type of means, variances, stddevs
print(type(means), type(variances), type(stddevs))
print("Means shape:", means.shape)
print("Variances shape:", variances.shape)
print("Standard Deviations shape:", stddevs.shape)
