# Experiment 2: SSVGP-LR

### (1) Importing and defining all required functions

In [1]:
"""
Importing in libraries
"""
import numpy as np
from functools import partial
import time
from IPython.display import clear_output
import inspect
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt

# Make plots inline
%matplotlib inline

"""
Importing algorithm functions
"""
import os
os.chdir('C:/Users/hughw/Documents/MSC project/GP algorithms/Master function files')
from GP_funcs_FRSS import kernel_funcs
from GP_funcs_FRSS import model_funcs
from GP_funcs_FRSS import draw_GP
from GP_funcs_FRSS import fit
from GP_funcs_FRSS import diagnostics
from GP_funcs_FRSS import simulations
from functools import partial
os.chdir('C:/Users/hughw/Documents/MSC project/Simulation results')

### (2) Setting simulation parameters and models

In [2]:
"""
Simulation controls
"""
# Simulation settings
n=100
ntest=20
p=1000
q=6
correlation = False
nruns = 100
l_init = 0.01
beta2=0.99
nmodels = 1
VS_threshs = [[0.9,0.95,0.99]]
iter_remove = [False]
sampling_strat = ["unif"]
minibatch_size = [50]
VBtol=0.1/p
GPtol=[1e-6]
base = [0.05]
post_var = [False]
opt = ["amsgrad"]
kern=kernel_funcs.gaussian
grad_kern=kernel_funcs.grad_gaussian

# Storage objects
t = len(VS_threshs[0])
m = 1
Runtime=np.zeros((nruns, m))
Lambda = np.zeros((nruns, m, p))
L = np.zeros((nruns, m, p))
L1norm=np.zeros((nruns, m))
L2norm=np.zeros((nruns, m))
MSE_F=np.zeros((nruns, m))
MSE_Y=np.zeros((nruns,m))
Acc=np.zeros((nruns,m,t))
Weighted_Acc=np.zeros((nruns,m,t))
TPR=np.zeros((nruns,m,t))
TNR=np.zeros((nruns,m,t))
PPV=np.zeros((nruns,m,t))
NPV=np.zeros((nruns,m,t))
MCC=np.zeros((nruns,m,t))
AUC=np.zeros((nruns,m))

### (3) Running algorithm iterations, saving and displaying results

In [None]:
np.random.seed(8750)
runlist = np.random.choice(1000,100,False) # Choose 100 random trials
for run in range(len(runlist)):
    
    """
    Generating data and scaling data
    """
    lselect=[]
    np.random.seed(runlist[run]) # Fixing trial seed
    t=time.time()
    Y,F,X,e,sigma,select=draw_GP.draw_parametric_savitsky(n,ntest,p,q, correlation)
    
    Y = Y.reshape(n+ntest,1)
    F = F.reshape(n+ntest,1)
    X = (X-X.mean(0))/X.var(0)**0.5

    # Getting training and test set
    ytest=Y[n:]
    Xtest=X[n:]
    ftest=F[n:]
    y=Y[:n]
    X=X[:n]
    f=F[:n]
    print("data generated")
    if lselect:
        print("Length-scales are: ",lselect[select])
    print("Noise variance is: ",sigma**2)
    print("Average data variance is: ", np.mean(np.var(X,0)))
    print(time.time()-t)
    
    """
    Running algorithm
    """
    args=[]
    arg_vals =[]
    for i in range(nmodels):
        args.append(["k","L0","seed", "subsample","svi_subsample", "ELBO_sample", "learn_rate", "ltrue", "learn_spike", "min_VBEM_iter", "ZT_init_iter", "max_VBEM_iter", "GP_fit_tol", "VBEM_tol", "print_VBEM",
             "s0", "sig0","v0","v1", "max_GP_fit_iter", "iter_remove", "sampling_strat", "final_prune"])
        arg_vals.append([10,1e-2, 1, minibatch_size[i], 5,  min(1000,n), 0.025, [], False, 5,100 , 5, 1e-5, VBtol, False, 
                 np.var(y), np.var(y)**0.5,1e+4,1e-4, 100, False, "unif",True])

    testing_algorithm = partial(diagnostics.get_pred_posterior_GP,reg=0.01,kern = kernel_funcs.gaussian)

    Runtime[run], Lambda[run], L[run], V, L1norm[run], L2norm[run], MSE_F[run], MSE_Y[run], Acc[run], Weighted_Acc[run], TPR[run], TNR[run], PPV[run], NPV[run], AUC[run], MCC[run] = simulations.do_simulation_VBEMSSGP(
                               y, X, ftest, ytest, Xtest, q, algorithm_training = fit.VB_EM_GP_SS, algorithm_testing = testing_algorithm, post_var = post_var,
                               nmodels = m, args = args, arg_vals = arg_vals, SS_GP = [True], hyper_opt = [True], train = [True],
                                hyper_arg = ["v0","v1"], hyper_vals = [1e+4*2**np.linspace(np.log2(100),-np.log2(100),11),2**np.linspace(np.log2(100),-np.log2(100),11)], order_relevant_vars = False, order_irrelevant_vars = False, 
                                VS_threshs = VS_threshs, select = select, predict_selected = [False], ltrue=lselect, MC_pred = [True], model_select = [False],
                                model_weighting = ["elpd"])
    
    print("RUN {0}".format(run))
    print("Runtime mean is:", Runtime[:run+1].mean(0))
    print("Weighted accuracy mean is:", Weighted_Acc[:run].mean(0))
    print("TPR mean is:", TPR[:run+1].mean(0))
    print("PPV mean is:", PPV[:run+1].mean(0))
    print("MCC mean is:", MCC[:run+1].mean(0))
    print("L1norm mean is:", L1norm[:run+1].mean(0))
    print("L2norm mean is:", L2norm[:run+1].mean(0))
    print("MSE_F mean is:", MSE_F[:run+1].mean(0))
    print("MSE_Y mean is:", MSE_Y[:run+1].mean(0), "\n")
    
    print("Runtime is:", Runtime[run])
    print("Weighted accuracy is:", Weighted_Acc[run])
    print("TPR is:", TPR[run])
    print("PPV is:", PPV[run])
    print("MCC is:", MCC[run])
    print("L1norm is:", L1norm[run])
    print("L2norm is:", L2norm[run])
    print("MSE_F is:", MSE_F[run])
    print("MSE_Y is:", MSE_Y[run], "\n")
    

data generated
Noise variance is:  0.0025000000000000005
Average data variance is:  0.9969892827714567
0.002999544143676758


In [None]:
namelist = ["Runtime", "MSE_F", "MSE_Y", "Acc", "Weighted_Acc", "TPR", "TNR", "PPV", "NPV", "AUC", "MCC"]
objlist = [Runtime, MSE_F, MSE_Y, Acc, Weighted_Acc, TPR, TNR, PPV, NPV, AUC, MCC]
#iters = np.random.choice(1000,100,False)
iters = np.linspace(0,99,100).astype(int)

for i in range(len(objlist)):
    print("Mean {0} is:".format(namelist[i]), np.mean(objlist[i][iters],0))

print("\n")
for i in range(len(objlist)):
    print("Median {0} is:".format(namelist[i]), np.median(objlist[i][iters],0))

print("\n")
quant = 0.25
for i in range(len(objlist)):
    if namelist[i] in ["Runtime", "MSE_F", "MSE_Y"]:
        print("{1} quantile {0} is:".format(namelist[i], quant), np.quantile(objlist[i][iters],1-quant,0))
    else:
        print("{1} quantile {0} is:".format(namelist[i], quant), np.quantile(objlist[i][iters],quant,0))

In [None]:
Output = {"Runtime" : Runtime, "Lambda" : Lambda, "L" : L, "L1norm" : L1norm, "L2norm" : L2norm, "MSE_F" : MSE_F
        , "MSE_Y" : MSE_Y, "Acc" : Acc, "Weighted_Acc" : Weighted_Acc, "TPR" :TPR, "TNR" : TNR, "PPV" : PPV, "NPV" : NPV, "AUC" : AUC, "MCC" : MCC}
String = "Stage1_Savitsky_LR_{0}_l0={1}_b2={2}_newgrads={3}_predselect={4}_MCpred={5}_hyperopt={11}_minibatch={12}_n={6}_p={7}_q={8}_kern={9}_runs={10}".format(
        date.today(), l_init, beta2, newsumgrads, predict_selected[0],MC_pred[0],n,p,q,str(kern)[23:28], nruns, hyper_opt[0],minibatch_size[0])
np.save(String, Output) # saving