# Set up

In [1]:
import torch
import gpytorch
import pandas as pd
import numpy as np
import tqdm as tqdm
from linear_operator import settings

import pyro
import math
import pickle
import time
from joblib import Parallel, delayed

from sklearn.preprocessing import StandardScaler

from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

import pyro.distributions as dist
from pyro.infer import MCMC, NUTS
import arviz as az
import seaborn as sns

import os
import scipy.stats as stats

In [2]:
import GP_functions.Loss_function as Loss_function
import GP_functions.bound as bound
import GP_functions.Estimation as Estimation
import GP_functions.Training as Training
import GP_functions.Prediction as Prediction
import GP_functions.GP_models as GP_models
import GP_functions.Tools as Tools
import GP_functions.FeatureE as FeatureE

In [3]:
Device = 'cpu'

# Data

In [4]:
X_train = pd.read_csv('Data/X_train.csv', header=None, delimiter=',').values
X_test = pd.read_csv('Data/X_test.csv', header=None, delimiter=',').values

# Y_train_8 = pd.read_csv('Data/Y_train_8.csv', header=None, delimiter=',').values
# Y_test_8 = pd.read_csv('Data/Y_test_8.csv', header=None, delimiter=',').values

Y_train_21 = pd.read_csv('Data/Y_train_std_21.csv', header=None, delimiter=',').values
Y_test_21 = pd.read_csv('Data/Y_test_std_21.csv', header=None, delimiter=',').values

Y_train_std = pd.read_csv('Data/Y_train_std.csv', header=None, delimiter=',').values
Y_test_std = pd.read_csv('Data/Y_test_std.csv', header=None, delimiter=',').values

In [5]:
train_x = torch.tensor(X_train, dtype=torch.float32)
test_x = torch.tensor(X_test, dtype=torch.float32)

# train_y_8 = torch.tensor(Y_train_8, dtype=torch.float32)
# test_y_8 = torch.tensor(Y_test_8, dtype=torch.float32)

train_y_21 = torch.tensor(Y_train_21, dtype=torch.float32)
test_y_21 = torch.tensor(Y_test_21, dtype=torch.float32)

train_y = torch.tensor(Y_train_std, dtype=torch.float32)
test_y = torch.tensor(Y_test_std, dtype=torch.float32)

# Emulator

## NN + MGP

Deep kernel learnig Multioutput GP

In [10]:
row_idx = 0

input_point = test_y_21[row_idx,:]
local_train_x, local_train_y = Tools.find_k_nearest_neighbors_CPU(input_point, train_x, train_y_21, k = 500)

bounds = bound.get_bounds(local_train_x)

In [11]:
MultitaskGP_models, MultitaskGP_likelihoods = Training.train_one_row_NNMultitaskGP(local_train_x, local_train_y, n_tasks = local_train_y.shape[1], 
                                                                                   feature_extractor_class = FeatureE.FeatureExtractor_4, covar_type = 'RQ', 
                                                                                   lr=0.05, num_iterations=5000, patience=10, device = Device)

## MVGP

In [6]:
MVGP_models, MVGP_likelihoods = Training.train_full_MultitaskVGP(train_x, train_y_21, 
                                                                 num_latents=14, num_inducing=100, 
                                                                 lr_hyper=0.05, lr_variational=0.05, num_iterations=5000, patience=50, device=Device)

 46%|████▌     | 2299/5000 [06:00<07:03,  6.38it/s, loss=-19.8] 


# MCMC

In [7]:
row_idx = 0

input_point = test_y_21[row_idx,:]
local_train_x, local_train_y = Tools.find_k_nearest_neighbors_CPU(input_point, train_x, train_y_21, k = 100)

bounds = bound.get_bounds(local_train_x)

## priori is Uniform(min, max)

In [None]:
def run_mcmc_Uniform(Pre_function, Models, Likelihoods, row_idx, test_y, bounds, num_sampling=2000, warmup_step=1000, num_chains=1):
    test_y = test_y.to(dtype=torch.float32)
    bounds = [(torch.tensor(b[0], dtype=torch.float32), torch.tensor(b[1], dtype=torch.float32)) for b in bounds]
    
    def model():

        params = []
        for i, (min_val, max_val) in enumerate(bounds):
            param_i = pyro.sample(f'param_{i}', dist.Uniform(min_val, max_val))
            params.append(param_i)
        
        theta = torch.stack(params)

        gp_pred = Pre_function(Models, Likelihoods, theta.unsqueeze(0))

        y_obs = test_y[row_idx, :]
        pyro.sample('obs', gp_pred, obs=y_obs)

    nuts_kernel = NUTS(model)
    mcmc = MCMC(nuts_kernel, num_samples=num_sampling, warmup_steps=warmup_step, num_chains=num_chains)
    mcmc.run()

    return mcmc

In [None]:
mcmc_result_1 = run_mcmc_Uniform(Prediction.preds_distribution, MVGP_models, MVGP_likelihoods, row_idx, test_y_21, bounds, 
                                            num_sampling = 120, warmup_step = 30, num_chains=1)



## priori is Uniform(0.1, 5)

In [8]:
def run_mcmc_Uniform_2(Pre_function, Models, Likelihoods, row_idx, test_y, bounds, num_sampling=2000, warmup_step=1000, num_chains=1):
    test_y = test_y.to(dtype=torch.float32)
    bounds = [(torch.tensor(b[0], dtype=torch.float32), torch.tensor(b[1], dtype=torch.float32)) for b in bounds]
    
    def model():

        params = []
        for i, (min_val, max_val) in enumerate(bounds):
            param_i = pyro.sample(f'param_{i}', dist.Uniform(0.1, 5))
            params.append(param_i)
        
        theta = torch.stack(params)

        gp_pred = Pre_function(Models, Likelihoods, theta.unsqueeze(0))

        y_obs = test_y[row_idx, :]
        pyro.sample('obs', gp_pred, obs=y_obs)

    nuts_kernel = NUTS(model)
    mcmc = MCMC(nuts_kernel, num_samples=num_sampling, warmup_steps=warmup_step, num_chains=num_chains)
    mcmc.run()

    return mcmc

In [None]:
mcmc_result_2 = run_mcmc_Uniform_2(Prediction.preds_distribution, MVGP_models, MVGP_likelihoods, row_idx, test_y_21, bounds, 
                                            num_sampling = 120, warmup_step = 30, num_chains=1)


In [21]:
import cProfile, pstats
profiler = cProfile.Profile()
profiler.enable()
mcmc_result = run_mcmc_Uniform_2(Prediction.preds_distribution, MVGP_models, MVGP_likelihoods, row_idx, test_y_21, bounds, 
                                            num_sampling = 1200, warmup_step = 300, num_chains=1)
profiler.disable()
stats = pstats.Stats(profiler).sort_stats('cumtime')
stats.print_stats()


Sample: 100%|██████████| 1500/1500 [24:06,  1.04it/s, step size=1.67e-01, acc. prob=0.920]

         905656359 function calls (833732286 primitive calls) in 1446.193 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000 1446.193  723.096 /home/pgrad1/2633042r/miniconda3/envs/GT/lib/python3.10/site-packages/IPython/core/interactiveshell.py:3541(run_code)
        2    0.000    0.000 1446.193  723.096 {built-in method builtins.exec}
        1    0.000    0.000 1446.193 1446.193 /tmp/ipykernel_2097197/3701999863.py:1(run_mcmc_Uniform_2)
 113830/1    0.310    0.000 1446.192 1446.192 /home/pgrad1/2633042r/miniconda3/envs/GT/lib/python3.10/site-packages/pyro/poutine/messenger.py:25(_context_wrap)
        1    0.008    0.008 1446.192 1446.192 /home/pgrad1/2633042r/miniconda3/envs/GT/lib/python3.10/site-packages/pyro/infer/mcmc/api.py:531(run)
     1203    0.002    0.000 1446.183    1.202 /home/pgrad1/2633042r/miniconda3/envs/GT/lib/python3.10/site-packages/pyro/infer/mcmc/api.py:213(run)
     1203 




<pstats.Stats at 0x7f9a69241420>

In [22]:
profiler = cProfile.Profile()
profiler.enable()
mcmc_result = run_mcmc_Uniform_2(Prediction.preds_distribution, MultitaskGP_models, MultitaskGP_likelihoods, row_idx, test_y_21, bounds, 
                                            num_sampling = 1000, warmup_step = 200)
profiler.disable()
stats = pstats.Stats(profiler).sort_stats('cumtime')
stats.print_stats()

Sample: 100%|██████████| 1200/1200 [42:15,  2.11s/it, step size=2.19e-01, acc. prob=0.866]

         694764106 function calls (647299454 primitive calls) in 2535.456 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.000    0.000 2535.456 1267.728 /home/pgrad1/2633042r/miniconda3/envs/GT/lib/python3.10/site-packages/IPython/core/interactiveshell.py:3541(run_code)
        2    0.000    0.000 2535.456 1267.728 {built-in method builtins.exec}
        1    0.000    0.000 2535.456 2535.456 /tmp/ipykernel_2097197/42870183.py:1(<module>)
        1    0.000    0.000 2535.456 2535.456 /tmp/ipykernel_2097197/3701999863.py:1(run_mcmc_Uniform_2)
  67921/1    0.299    0.000 2535.455 2535.455 /home/pgrad1/2633042r/miniconda3/envs/GT/lib/python3.10/site-packages/pyro/poutine/messenger.py:25(_context_wrap)
        1    0.012    0.012 2535.455 2535.455 /home/pgrad1/2633042r/miniconda3/envs/GT/lib/python3.10/site-packages/pyro/infer/mcmc/api.py:531(run)
     1003    0.002    0.000 2535.442    2.528 /home/pgrad1/263304




<pstats.Stats at 0x7f9a694fb820>

In [20]:
mcmc_result.get_extra_fields()

AttributeError: 'MCMC' object has no attribute 'get_extra_fields'

In [14]:
mcmc_data = {
    "samples": mcmc_result.get_samples(),
    "extra_fields": mcmc_result.get_extra_fields(),
    "num_samples": mcmc_result.num_samples,
    "warmup_steps": mcmc_result.warmup_steps,
}

AttributeError: 'MCMC' object has no attribute 'get_extra_fields'

## priori is Normal(mean, std) from local data

In [None]:
def run_mcmc_Normal(Pre_function, Models, Likelihoods, row_idx, test_y, local_train_x, num_sampling=2000, warmup_step=1000, num_chains=1):
    def model():
        params = []
        
        for i in range(local_train_x.shape[1]):
            # mean = local_train_x[:, i].mean()
            # std = local_train_x[:, i].std()
            mean, std = stats.norm.fit(local_train_x[:, i])
            param_i = pyro.sample(f'param_{i}', dist.Normal(mean, std))
            params.append(param_i)
        
        theta = torch.stack(params)
        
        gp_pred = Pre_function(Models, Likelihoods, theta.unsqueeze(0))

        y_obs = test_y[row_idx, :]
        pyro.sample('obs', gp_pred, obs=y_obs)

    nuts_kernel = NUTS(model)
    mcmc = MCMC(nuts_kernel, num_samples=num_sampling, warmup_steps=warmup_step, num_chains=num_chains)
    mcmc.run()

    # posterior_samples = mcmc.get_samples()

    # idata = az.from_pyro(mcmc)

    # summary = az.summary(idata, hdi_prob=0.95)
    
    return mcmc


# END