In [2]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import GPy

import sys
sys.path.append('..')
from nmgp_dsvi import *

In [15]:
def ICM(data, do_plot_raw_data=False, do_plot_post_process=False):
    if not os.path.exists("../../res/sim_VI/{}".format(data)):
        os.mkdir("../../res/sim_VI/{}".format(data))

    # Upload Data
    with open("../../data/simulation/" + data +  ".pickle", "rb") as res:
        X_list, Y_list, Xt_list, Yt_list = pickle.load(res)

    if do_plot_raw_data:
        fig = plt.figure()
        ax1 = fig.add_subplot(211)
        ax1.set_title('Output 1')
        ax1.plot(X_list[0], Y_list[0],'kx',mew=1.5,label='Train set')
        ax1.plot(Xt_list[0], Yt_list[0],'rx',mew=1.5,label='Test set')
        ax1.legend()
        ax2 = fig.add_subplot(212)
        ax2.set_title('Output 2')
        ax2.plot(X_list[1], Y_list[1],'kx',mew=1.5,label='Train set')
        ax2.plot(Xt_list[1], Yt_list[1],'rx',mew=1.5,label='Test set')
        ax2.legend()

    K = GPy.kern.Exponential(1)
    icm = GPy.util.multioutput.ICM(input_dim=1,num_outputs=2,kernel=K)
    m = GPy.models.GPCoregionalizedRegression(X_list,Y_list,kernel=icm)
    m['.*Exponential.var'].constrain_fixed(1.) #For this kernel, B.kappa encodes the variance now.
    m.optimize()

    grids = np.linspace(0, 1, 200)[:, None]
    test_index = np.concatenate([i*np.ones_like(Xt_list[i]) for i in range(2)]).astype(int)
    trainX = np.vstack([np.hstack([X_list[i], np.ones_like(X_list[i])*i]) for i in range(2)])
    train_dict = {'output_index':trainX[:,1].astype(int)}
    est_Y_train = m.predict(trainX, Y_metadata=train_dict)[0]
    newX1 = np.hstack([grids, np.zeros_like(grids)])
    newX2 = np.hstack([grids, np.ones_like(grids)])
    newX = np.vstack([newX1, newX2])
    noise_dict = {'output_index':newX[:,1].astype(int)}
    gridy_Y = np.hstack(m.predict_quantiles(newX, quantiles=(2.5, 50, 97.5), Y_metadata=noise_dict)).T
    newXt = np.vstack([np.hstack([Xt_list[i], np.ones_like(Xt_list[i])*i]) for i in range(2)])
    noise_dictt = {'output_index':newXt[:,1].astype(int)}
    quantiles_Y_test = np.hstack(m.predict_quantiles(newXt, quantiles=(2.5, 50, 97.5), Y_metadata=noise_dictt)).T
    est_Y_test = m.predict(newXt, Y_metadata=noise_dictt)[0]

    dir_name = "../../res/sim_VI/{}/".format(data)
    name = "ICM.png"
    if do_plot_post_process:
        plot(2, X_list, Y_list, Xt_list, Yt_list, test_index, est_Y_test, grids, gridy_Y, dir_name, name)

    dim_outputs = 2
    Y_train = np.vstack(Y_list)
    Y_test = np.vstack(Yt_list)
    # analysis
    # print(data)
    rmse_train = np.sqrt(np.mean((est_Y_train - Y_train) ** 2))
    predy_quantiles = quantiles_Y_test
    length_test = np.mean(predy_quantiles[2] - predy_quantiles[0])
    rmse_test = np.sqrt(np.mean((est_Y_test - Y_test) ** 2))
    # print("rmse_reconstruction: {}".format(rmse_train))
    # print("rmse_predtiction: {}".format(rmse_test))
    # print("average length of CI: {}".format(length_test))
    # import pdb; pdb.set_trace()
    # compute coverage rate
    CN = np.zeros(dim_outputs)
    TT = np.zeros(dim_outputs)
    for i in range(Y_test.shape[0]):
        if Y_test[i] > predy_quantiles[0, i] and Y_test[i] < predy_quantiles[2, i]:
            CN[test_index[i]] += 1
        TT[test_index[i]] += 1
    CR = CN/TT
    # print("coverage rate", CR)
        # import pdb; pdb.set_trace()
    return rmse_train, rmse_test, length_test, CR

In [16]:
data = "sim_illustration_varying_freq"

rmse_test_list = list()
length_test_list = list()
CR_list = list()
for i in range(10):
    rmse_train, rmse_test, length_test, CR = ICM(data)
    rmse_test_list.append(rmse_test)
    length_test_list.append(length_test)
    CR_list.append(CR.mean())
rmse_test_array = np.array(rmse_test_list)
length_test_array = np.array(length_test_list)
CR_array = np.array(CR_list)
print("rmse_predtiction:", rmse_test_array.mean(), rmse_test_array.std())
print("average length of CI:", length_test_array.mean(), length_test_array.std())
print("coverage rate:", CR_array.mean(), CR_array.std())

rmse_predtiction: 1.6603599961452047 0.002365771235683471
average length of CI: 3.1636427112767853 0.0014851749147247481
coverage rate: 0.8795 0.0015000000000000013


In [17]:
data = "sim_illustration_low_freq"

rmse_test_list = list()
length_test_list = list()
CR_list = list()
for i in range(10):
    rmse_train, rmse_test, length_test, CR = ICM(data)
    rmse_test_list.append(rmse_test)
    length_test_list.append(length_test)
    CR_list.append(CR.mean())
rmse_test_array = np.array(rmse_test_list)
length_test_array = np.array(length_test_list)
CR_array = np.array(CR_list)
print("rmse_predtiction:", rmse_test_array.mean(), rmse_test_array.std())
print("average length of CI:", length_test_array.mean(), length_test_array.std())
print("coverage rate:", CR_array.mean(), CR_array.std())

rmse_predtiction: 2.263080672853686 2.5391981031964657e-05
average length of CI: 2.178777096129261 1.2223994996957734e-05
coverage rate: 0.835 0.0


In [18]:
data = "sim_illustration_high_freq"

rmse_test_list = list()
length_test_list = list()
CR_list = list()
for i in range(10):
    rmse_train, rmse_test, length_test, CR = ICM(data)
    rmse_test_list.append(rmse_test)
    length_test_list.append(length_test)
    CR_list.append(CR.mean())
rmse_test_array = np.array(rmse_test_list)
length_test_array = np.array(length_test_list)
CR_array = np.array(CR_list)
print("rmse_predtiction:", rmse_test_array.mean(), rmse_test_array.std())
print("average length of CI:", length_test_array.mean(), length_test_array.std())
print("coverage rate:", CR_array.mean(), CR_array.std())

rmse_predtiction: 1.5196469276196278 1.0081086647927932e-05
average length of CI: 3.169413829509468 1.190460480287905e-05
coverage rate: 0.9099999999999999 0.0
