In [2]:
import os
import argparse
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import GPy

import sys
sys.path.append('..')
from nmgp_dsvi import *

In [34]:
def GPR(data, do_plot_raw_data=False, do_plot_post_process=False):
    if not os.path.exists("../../res/sim_VI/{}".format(data)):
            os.mkdir("../../res/sim_VI/{}".format(data))

    # Upload Data
    with open("../../data/simulation/" + data +  ".pickle", "rb") as res:
        X_list, Y_list, Xt_list, Yt_list = pickle.load(res)

    if do_plot_raw_data:
        fig = plt.figure()
        ax1 = fig.add_subplot(211)
        ax1.set_title('Output 1')
        ax1.plot(X_list[0], Y_list[0],'kx',mew=1.5,label='Train set')
        ax1.plot(Xt_list[0], Yt_list[0],'rx',mew=1.5,label='Test set')
        ax1.legend()
        ax2 = fig.add_subplot(212)
        ax2.set_title('Output 2')
        ax2.plot(X_list[1], Y_list[1],'kx',mew=1.5,label='Train set')
        ax2.plot(Xt_list[1], Yt_list[1],'rx',mew=1.5,label='Test set')
        ax2.legend()

    K = GPy.kern.Exponential(1)
    m1 = GPy.models.GPRegression(X_list[0], Y_list[0], kernel=K.copy())
    m1.optimize()
    m2 = GPy.models.GPRegression(X_list[1], Y_list[1], kernel=K.copy())
    m2.optimize()

    grids = np.linspace(0, 1, 200)[:, None]
    test_index = np.concatenate([i*np.ones_like(Xt_list[i]) for i in range(2)]).astype(int)

    est_Y_train1 = m1.predict(X_list[0])[0]
    est_Y_train2 = m2.predict(X_list[1])[0]
    est_Y_train = np.vstack([est_Y_train1, est_Y_train2])
    est_Y_test1 = m1.predict(Xt_list[0])[0]
    est_Y_test2 = m2.predict(Xt_list[1])[0]
    est_Y_test = np.vstack([est_Y_test1, est_Y_test2])
    quantiles_grid_Y1 = np.hstack(m1.predict_quantiles(grids, quantiles=(2.5, 50, 97.5))).T
    quantiles_grid_Y2 = np.hstack(m2.predict_quantiles(grids, quantiles=(2.5, 50, 97.5))).T
    quantiles_grid_Y = np.hstack([quantiles_grid_Y1, quantiles_grid_Y2])
    quantiles_test_Y1 = np.hstack(m1.predict_quantiles(Xt_list[0], quantiles=(2.5, 97.5))).T
    quantiles_test_Y2 = np.hstack(m2.predict_quantiles(Xt_list[1], quantiles=(2.5, 97.5))).T
    quantiles_test_Y = np.hstack([quantiles_test_Y1, quantiles_test_Y2])

    dir_name = "../../res/sim_VI/{}/".format(data)
    name = "GPR.png"
    if do_plot_post_process:
        plot(2, X_list, Y_list, Xt_list, Yt_list, test_index, est_Y_test, grids, quantiles_grid_Y, dir_name, name)

    dim_outputs = 2
    Y_train = np.vstack(Y_list)
    Y_test = np.vstack(Yt_list)
    # analysis
    # print(data)
    rmse_train = np.sqrt(np.mean((est_Y_train - Y_train) ** 2))
    predy_quantiles = quantiles_test_Y
    length_test = np.mean(predy_quantiles[1] - predy_quantiles[0])
    rmse_test = np.sqrt(np.mean((est_Y_test - Y_test) ** 2))
    # print("rmse_reconstruction: {}".format(rmse_train))
    # print("rmse_predtiction: {}".format(rmse_test))
    # print("average length of CI: {}".format(length_test))
    # import pdb; pdb.set_trace()
    # compute coverage rate
    CN = np.zeros(dim_outputs)
    TT = np.zeros(dim_outputs)
    for i in range(Y_test.shape[0]):
        if Y_test[i] > predy_quantiles[0, i] and Y_test[i] < predy_quantiles[1, i]:
            CN[test_index[i]] += 1
        TT[test_index[i]] += 1
    CR = CN/TT
    # print("coverage rate", CR)
    return rmse_train, rmse_test, length_test, CR

In [36]:
data = "sim_illustration_varying_freq"

rmse_test_list = list()
length_test_list = list()
CR_list = list()
for i in range(10):
    rmse_train, rmse_test, length_test, CR = GPR(data)
    rmse_test_list.append(rmse_test)
    length_test_list.append(length_test)
    CR_list.append(CR.mean())
rmse_test_array = np.array(rmse_test_list)
length_test_array = np.array(length_test_list)
CR_array = np.array(CR_list)
print("rmse_predtiction:", rmse_test_array.mean(), rmse_test_array.std())
print("average length of CI:", length_test_array.mean(), length_test_array.std())
print("coverage rate:", CR_array.mean(), CR_array.std())

rmse_predtiction: 1.6402989821226122 8.167605275169826e-14
average length of CI: 3.185109202791763 3.0234462916586963e-13
coverage rate: 0.875 0.0


In [37]:
data = "sim_illustration_low_freq"

rmse_test_list = list()
length_test_list = list()
CR_list = list()
for i in range(10):
    rmse_train, rmse_test, length_test, CR = GPR(data)
    rmse_test_list.append(rmse_test)
    length_test_list.append(length_test)
    CR_list.append(CR.mean())
rmse_test_array = np.array(rmse_test_list)
length_test_array = np.array(length_test_list)
CR_array = np.array(CR_list)
print("rmse_predtiction:", rmse_test_array.mean(), rmse_test_array.std())
print("average length of CI:", length_test_array.mean(), length_test_array.std())
print("coverage rate:", CR_array.mean(), CR_array.std())

rmse_predtiction: 2.247020180379943 1.3338402574740823e-13
average length of CI: 2.1784392207206147 1.8767732273446515e-13
coverage rate: 0.835 0.0


In [38]:
data = "sim_illustration_high_freq"

rmse_test_list = list()
length_test_list = list()
CR_list = list()
for i in range(10):
    rmse_train, rmse_test, length_test, CR = GPR(data)
    rmse_test_list.append(rmse_test)
    length_test_list.append(length_test)
    CR_list.append(CR.mean())
rmse_test_array = np.array(rmse_test_list)
length_test_array = np.array(length_test_list)
CR_array = np.array(CR_list)
print("rmse_predtiction:", rmse_test_array.mean(), rmse_test_array.std())
print("average length of CI:", length_test_array.mean(), length_test_array.std())
print("coverage rate:", CR_array.mean(), CR_array.std())

rmse_predtiction: 1.5068931903383582 6.005536950820616e-14
average length of CI: 3.1701422917603272 1.2966636873879597e-13
coverage rate: 0.9149999999999998 2.220446049250313e-16
