# Set up

In [1]:
import torch
import gpytorch
import pandas as pd
import numpy as np
import tqdm as tqdm
from linear_operator import settings

import pyro
import math
import pickle
import time
from joblib import Parallel, delayed

from sklearn.preprocessing import StandardScaler

from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

import pyro.distributions as dist
from pyro.infer import MCMC, NUTS
import arviz as az
import seaborn as sns

import os

from torch.utils.data import TensorDataset, DataLoader
import itertools

import pyro
from pyro.infer import SVI, Trace_ELBO, Predictive
from pyro.infer.autoguide import AutoDiagonalNormal
from pyro.optim import ClippedAdam

In [2]:
import GP_functions.Loss_function as Loss_function
import GP_functions.bound as bound
import GP_functions.Estimation as Estimation
import GP_functions.Training as Training
import GP_functions.Prediction as Prediction
import GP_functions.NN_models as NN_models
import GP_functions.Tools as Tools
import GP_functions.FeatureE as FeatureE

# Data

In [3]:
X_train = pd.read_csv('Data/X_train.csv', header=None, delimiter=',').values
X_test = pd.read_csv('Data/X_test.csv', header=None, delimiter=',').values

Y_train_21 = pd.read_csv('Data/Y_train_std_21.csv', header=None, delimiter=',').values
Y_test_21 = pd.read_csv('Data/Y_test_std_21.csv', header=None, delimiter=',').values

Y_train = pd.read_csv('Data/Y_train_std.csv', header=None, delimiter=',').values
Y_test = pd.read_csv('Data/Y_test_std.csv', header=None, delimiter=',').values


train_x = torch.tensor(X_train, dtype=torch.float32)
test_x = torch.tensor(X_test, dtype=torch.float32)

train_y_21 = torch.tensor(Y_train_21, dtype=torch.float32)
test_y_21 = torch.tensor(Y_test_21, dtype=torch.float32)

# train_y = torch.tensor(Y_train, dtype=torch.float32)
# test_y = torch.tensor(Y_test, dtype=torch.float32)


# torch.set_default_dtype(torch.float32)

# Model

In [None]:
NN_4 = Training.train_DNN_MSE(NN_models.NN_4,
                              train_x, train_y_21,
                              num_iterations= 50000,
                              device= 'cuda',
                              show_progress = True,
                              weight_decay = 0,
                              val_x=test_x,
                              val_y=test_y_21,
                              early_stopping = True,
                              patience = 1000,
                              val_check_interval = 100)

In [None]:
NN_4_cpu = NN_4.to('cpu')

In [None]:
Device = 'cpu'

row_idx = 10
input_point = test_y_21[row_idx, :]

local_train_x, local_train_y = Tools.find_k_nearest_neighbors_CPU(input_point, train_x, train_y_21, k=100)

preds_tmp = Prediction.preds_for_DNN(NN_4_cpu, test_x[row_idx,:].unsqueeze(0).to(Device)
    ).cpu().detach().numpy()


In [None]:
preds_tmp

In [None]:
bounds = bound.get_bounds(local_train_x)

estimated_params_tmp, Loss = Estimation.multi_start_estimation_DModel(NN_4_cpu, row_idx, test_y_21, bounds,
                                                                   Estimation.estimate_params_for_NN_Adam, num_starts=4, num_iterations=1000, lr=0.01,
                                                                   patience=10, attraction_threshold=0.1, repulsion_strength=0.1, device=Device)

In [None]:
Loss

In [None]:
estimated_params_tmp

In [5]:
BNN_5_model, BNN_5_guide = Training.train_BNN_minibatch(NN_models.BNN_5,
    train_x,
    train_y_21,
    num_iterations=5,
    batch_size=256,
    device='cuda',
    show_progress=True,
    val_x=test_x,
    val_y=test_y_21,
    early_stopping=True,
    patience=1000,
    val_check_interval=100)

SVI training: 100%|██████████| 5/5 [00:01<00:00,  3.37it/s]


In [4]:
BNN_WideDrop_model, BNN_WideDrop_guide = Training.train_BNN_minibatch(NN_models.BNN_WideDrop,
    train_x,
    train_y_21,
    num_iterations=50000,
    batch_size=256,
    device='cuda',
    show_progress=True,
    val_x=test_x,
    val_y=test_y_21,
    early_stopping=True,
    patience=1000,
    val_check_interval=100)

SVI training: 100%|██████████| 50000/50000 [41:55<00:00, 19.87it/s] 


In [7]:
def evaluate_mse(model, x, y):
    model.eval()
    x = x.to(device)
    y = y.to(device)
    with torch.no_grad():
        # model(x) returns a Distribution when y=None
        pred_dist = model(x)
        preds = pred_dist.mean
    # MSE over all outputs
    return torch.mean((preds - y) ** 2).item()

device = 'cuda'

BNN_WideDrop_model.to(device)
BNN_WideDrop_guide.to(device)
mse = evaluate_mse(BNN_WideDrop_model, test_x, test_y_21)
print(f"MSE on test set: {mse:.4f}")

MSE on test set: 47563.5898


In [None]:
BNN_ARD_model, BNN_ARD_guide = Training.train_BNN_minibatch(NN_models.BNN_ARD,
    train_x,
    train_y_21,
    num_iterations=50000,
    batch_size=256,
    device='cuda',
    show_progress=True,
    val_x=test_x,
    val_y=test_y_21,
    early_stopping=True,
    patience=1000,
    val_check_interval=100)

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model_path = "best_model_BNN_5.pt"
guide_path = "best_guide_BNN_5.pt"

BNN_5_model.load_state_dict(torch.load(model_path, map_location=device))
BNN_5_guide.load_state_dict(torch.load(guide_path, map_location=device))

with open("best_param_store_BNN_5.pkl", "rb") as f:
    raw_state = pickle.load(f)


def to_device(obj):
    if torch.is_tensor(obj):
        return obj.to(device)
    elif isinstance(obj, dict):
        return {k: to_device(v) for k, v in obj.items()}
    elif isinstance(obj, (list, tuple)):
        return type(obj)(to_device(v) for v in obj)
    else:
        return obj

param_state = to_device(raw_state)

pyro.get_param_store().set_state(param_state)

# Paramater inference

In [14]:
row_idx = 0
input_point = test_y_21[row_idx, :]

local_train_x, local_train_y = Tools.find_k_nearest_neighbors_GPU(input_point, train_x, train_y_21, k=100)


In [25]:
predictive = Predictive(BNN_5_model, guide=BNN_5_guide, 
           return_sites=["obs"],
           num_samples=100)

In [27]:
samples = predictive(test_x[row_idx,:].unsqueeze(0).to(device))["obs"] 

In [31]:
samples.mean(0)  

tensor([[-0.1690,  0.0863,  0.1408,  0.0076, -0.1441, -0.0850,  0.1623, -0.1379,
          0.0493, -0.1689, -0.2445,  0.0909, -0.3405,  0.1774,  0.0620, -0.0437,
         -0.3060, -0.1009, -0.0435,  0.0149, -0.0213]], device='cuda:0')

In [29]:
input_point

tensor([-0.9226,  4.2240, -0.2954,  1.5937,  1.0363,  1.3768, -0.2562,  0.0340,
        -0.3592,  0.1005,  0.0101,  0.1750, -0.0758, -0.2362,  0.1707, -0.1471,
        -0.0780, -0.0288,  0.0638,  0.0508,  0.0267])

In [52]:
BNN_5_model.eval()
with torch.no_grad():
    dist_pred = BNN_5_model(test_x[row_idx,:].unsqueeze(0).to(device))

dist_pred.mean

tensor([[ 0.0759,  0.1628,  0.0274, -0.0420, -0.1559, -0.0450,  0.1170,  0.0127,
          0.2242, -0.0694, -0.0237, -0.1116,  0.1112, -0.1465,  0.0098,  0.1643,
         -0.1187,  0.0248,  0.0647,  0.0690,  0.0381]], device='cuda:0')

## Point

In [None]:
preds_tmp = Prediction.preds_for_one_model(
    MVGP_models, MVGP_likelihoods, test_x[row_idx,:].unsqueeze(0).to(Device)
    ).cpu().detach().numpy()


bounds = bound.get_bounds(local_train_x)

estimated_params_tmp, _ = Estimation.multi_start_estimation(
    MVGP_models, MVGP_likelihoods, row_idx, test_y_21, bounds,
    Estimation.estimate_params_for_one_model_Adam, num_starts=4, num_iterations=1000, lr=0.01,
    patience=10, attraction_threshold=0.1, repulsion_strength=0.1, device=Device
)

with open(output_file, 'a') as f:
    # f.write(f"{row_idx + 1},\"{list(preds_tmp)}\",\"{list(estimated_params_tmp.detach().numpy())}\"\n")
    f.write(f"{row_idx + 1},\"{list(preds_tmp)}\",\"{list(estimated_params_tmp)}\"\n")

mcmc_result_Uniform = Estimation.run_mcmc_Uniform(
    Prediction.preds_distribution, MVGP_models, MVGP_likelihoods, 
    row_idx, test_y_21, bounds, 
    num_sampling=1200, warmup_step=300, num_chains=1, device=Device
)
posterior_samples_Uniform = mcmc_result_Uniform.get_samples()

## MCMC