In [1]:
import numpy as np
import os
import sys
import matplotlib.pyplot as plt

In [2]:
sys.path.append('/Users/fllorente/Dropbox/con_Petar/PYTHON/gp_fusion')


In [3]:
from modules.data_handling import load_and_normalize_data, split_dataset, create_validation_set
from modules.prediction_storage import store_predictions
from modules.fusion_methods import compute_neg_log_like, product_fusion, train_and_predict_fusion_method
from modules.model_training import train_and_predict_single_gp, train_expert, store_predictions_for_experts
from modules.phs import phs
from modules.bhs import bhs
from modules.model_training import train_joint_experts_shared_kernel


In [28]:
# ------------ Load and normalize data --------- #
# dataset_name = "concrete"
dataset_name = "pendulum"
split=3
X_train, y_train, X_test, y_test = load_and_normalize_data(dataset_name, 
                                                           split,
                                                           normalize_x_method="max-min",
                                                           normalize_y=True)

print("training size: ", len(y_train))
print("test size: ", len(y_test))

training size:  567
test size:  63


In [29]:
n_experts = 3
val_incest = True

if val_incest:
    print("Experts are trained on all training data and validation data is taken from training splits")
    # we create the splits using all training data
    splits = split_dataset(X_train, y_train, n_splits=n_experts, with_replacement=False)
    # we create the validation from the same training data (= data incest)
    n_points_per_split = 5
    X_val, y_val = create_validation_set(splits, n_points_per_split)
else:
    print("Training data is split into two: one for training the experts and the other for the weights")
    validation_proportion = 0.1
    num_val_samples = int(validation_proportion*len(X_train))

    indices = np.arange(len(X_train))
    np.random.seed(11)
    np.random.shuffle(indices)  # Shuffle the data indices
    val_indices = indices[:num_val_samples]
    train_indices = indices[num_val_samples:]

    # training data is split into two so that we don't use twice the data for two learning/training stages (experts' training and the weight training)
    X_train_train = X_train[train_indices].copy()
    y_train_train = y_train[train_indices].copy()

    X_val = X_train[val_indices].copy()
    y_val = y_train[val_indices].copy()

    splits = split_dataset(X_train_train, y_train_train, n_splits=n_experts, with_replacement=False)

    

Experts are trained on all training data and validation data is taken from training splits


In [30]:
len(X_val)

15

In [31]:
for split in splits:
    print(split[0].shape[0])

189
189
189


In [32]:
kappa = 2
lambdaa = 2
training_iter = 200
lr = 0.01

joint_training = False

if joint_training:
    print("Experts are trained jointly")
    # ====== for joint training ========== #
    models, likelihood = train_joint_experts_shared_kernel(splits, kappa, lambdaa,
                                                           lr=lr,
                                                           training_iter=training_iter)
    experts = [(model,likelihood) for model in models]
    # ====== for independent training ==== #
else:
    print("Experts are trained independently")
    experts = []
    for X_split, y_split in splits:
        model, likelihood = train_expert(X_split, y_split, kappa, lambdaa,
                                         lr=lr,
                                         training_iter=training_iter)
        experts.append((model, likelihood))


# Store predictions for experts on the test set
mu_preds_test, std_preds_test, std_preds_prior_test = store_predictions_for_experts(experts, X_test)

# Compute negative log likelihood for experts
nlpd_experts = compute_neg_log_like(mu_preds_test, std_preds_test, y_test)
print(nlpd_experts, ", average NLPD: ",nlpd_experts.mean())

# Store predictions for experts on the validation set
mu_preds_val, std_preds_val, _ = store_predictions_for_experts(experts, X_val) # we don't need the prior predictive variances here...


Experts are trained independently
[0.59293142 0.52098088 0.76588723] , average NLPD:  0.6265998444240394


In [58]:
import torch
import random

# seed = 100
# random.seed(seed)
# np.random.seed(seed)
# torch.manual_seed(seed)

lr = 0.1
training_iter = 100
kappa = 5
lambdaa = 5

# ---------  Single GP using all training data ----- #
test_preds, _ = train_and_predict_single_gp(X_train, y_train, X_test, X_val, kappa, lambdaa,
                                            lr=lr,
                                            training_iter=training_iter,
                                            )
nlpd_single_gp = compute_neg_log_like(test_preds.mean.numpy().reshape(-1, 1), 
                                      np.sqrt(test_preds.variance.numpy().reshape(-1, 1)), y_test)
print("full GP: ", nlpd_single_gp.squeeze())

full GP:  -0.9710924335998848


# Testing the different stacking algorithms:
PHS with MCMC or VI (AutoDelta or AutoNormal)

BHS with MCMC or VI (AutoDelta or AutoNormal)

RFF-based PHS with MCMC or VI (coming soon...)

RFF-based BHS with MCMC or VI (coming soon...)

## PHS

MCMC with NUTS

In [None]:
preds, lpd = train_and_predict_fusion_method(
                                phs,
                                X_val,
                                mu_preds_val,
                                std_preds_val,
                                y_val,
                                X_test,
                                mu_preds_test,
                                std_preds_test,
                                y_test,
                                method="mcmc",
                                parallel_mcmc=True,
                                show_progress=True,
                                )

In [None]:
preds["w"].shape

In [None]:
nlpd_phs_mcmc = -lpd.mean()
for i in range(n_experts):
    plt.plot(preds["w"].mean(0)[i,:])
print("PHS with MCMC: ", nlpd_phs_mcmc)    

MAP with SVI AutoDelta

In [None]:
preds, lpd = train_and_predict_fusion_method(
                                phs,
                                X_val,
                                mu_preds_val,
                                std_preds_val,
                                y_val,
                                X_test,
                                mu_preds_test,
                                std_preds_test,
                                y_test,
                                method="svi",
                                guide_svi="map",
                                show_progress=True,
                                )

In [None]:
preds["w"].shape

In [None]:
nlpd_phs_map = -lpd.mean()
for i in range(n_experts):
    plt.plot(preds["w"].mean(0)[i,:])
print("PHS with MAP: ", nlpd_phs_map)   

Approximate Gaussian posterior with SVI AutoNormal

In [None]:
preds, lpd = train_and_predict_fusion_method(
                                phs,
                                X_val,
                                mu_preds_val,
                                std_preds_val,
                                y_val,
                                X_test,
                                mu_preds_test,
                                std_preds_test,
                                y_test,
                                method="svi",
                                guide_svi="normal",
                                show_progress=True,
                                )

In [None]:
preds["w"].shape

In [None]:
nlpd_phs_normal = -lpd.mean()
for i in range(n_experts):
    plt.plot(preds["w"].mean(0)[i,:])
print("PHS with Normal: ", nlpd_phs_normal)  

## BHS

MCMC with NUTS

In [None]:
preds, lpd = train_and_predict_fusion_method(
                                bhs,
                                X_val,
                                mu_preds_val,
                                std_preds_val,
                                y_val,
                                X_test,
                                mu_preds_test,
                                std_preds_test,
                                y_test,
                                method="mcmc",
                                parallel_mcmc=True,
                                show_progress=True,
                                )

In [None]:
preds["w"].shape

In [None]:
nlpd_bhs_mcmc = -lpd.mean()
for i in range(n_experts):
    plt.plot(preds["w"].mean(0).T[i,:])
print("BHS with MCMC: ", nlpd_bhs_mcmc)  

MAP with SVI AutoDelta

In [None]:
preds, lpd = train_and_predict_fusion_method(
                                bhs,
                                X_val,
                                mu_preds_val,
                                std_preds_val,
                                y_val,
                                X_test,
                                mu_preds_test,
                                std_preds_test,
                                y_test,
                                method="svi",
                                guide_svi="map",
                                show_progress=True,
                                )

In [None]:
preds["w"].shape

In [None]:
nlpd_bhs_map = -lpd.mean()
for i in range(n_experts):
    plt.plot(preds["w"].mean(0).T[i,:])
print("BHS with MAP: ", nlpd_bhs_map)  

Approximate Gaussian posterior with SVI AutoNormal

In [None]:
preds, lpd = train_and_predict_fusion_method(
                                bhs,
                                X_val,
                                mu_preds_val,
                                std_preds_val,
                                y_val,
                                X_test,
                                mu_preds_test,
                                std_preds_test,
                                y_test,
                                method="svi",
                                guide_svi="normal",
                                show_progress=True,
                                )

In [None]:
preds["w"].shape

In [None]:
nlpd_bhs_normal = -lpd.mean()
for i in range(n_experts):
    plt.plot(preds["w"].mean(0).T[i,:])
print("BHS with Normal: ", nlpd_bhs_normal)  