# Experiment 1: SSVGP-ZT

### (1) Importing and defining all required functions

In [None]:
"""
Importing in libraries
"""
import numpy as np
from functools import partial
import time
from IPython.display import clear_output
import inspect
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt

# Make plots inline
%matplotlib inline

"""
Importing algorithm functions
"""
import os
os.chdir('C:/Users/hughw/Documents/MSC project/GP algorithms/Master function files')
from GP_funcs_ZTMFSS import kernel_funcs
from GP_funcs_ZTMFSS import model_funcs
from GP_funcs_ZTMFSS import draw_GP
from GP_funcs_ZTMFSS import fit
from GP_funcs_ZTMFSS import diagnostics
from GP_funcs_ZTMFSS import simulations
from functools import partial
os.chdir('C:/Users/hughw/Documents/MSC project/Simulation results')

### (2) Setting simulation parameters and models

In [None]:
"""
Simulation controls
"""
# Simulation settings
n=300
ntest=100
p=100
q=5
corr=0.5 
r2=0.9
lsmean=1 # TOGGLE BETWEN 0.25 AND 1
sigma2=1
ltrue=np.ones(p)*lsmean/q**0.5
strue=1
nruns = 100
nmodels = 1
VS_threshs = [[0.9,0.95,0.99]]
minibatch_size = [150]
base = [0.025]
opt = ["amsgrad"]
kern=kernel_funcs.gaussian
grad_kern=kernel_funcs.grad_gaussian


# Storage objects
t = len(VS_threshs[0])
m=nmodels
Runtime=np.zeros((nruns, m))
Lambda = np.zeros((nruns, m, p))
L = np.zeros((nruns, m, p))
L1norm=np.zeros((nruns, m))
L2norm=np.zeros((nruns, m))
MSE_F=np.zeros((nruns, m))
MSE_Y=np.zeros((nruns,m))
Acc=np.zeros((nruns,m,t))
Weighted_Acc=np.zeros((nruns,m,t))
TPR=np.zeros((nruns,m,t))
TNR=np.zeros((nruns,m,t))
PPV=np.zeros((nruns,m,t))
NPV=np.zeros((nruns,m,t))
AUC=np.zeros((nruns,m))
MCC=np.zeros((nruns,m,t))

### (3) Running results

In [None]:
"""
Setting seed for 100 random trials
"""
np.random.seed(8750)
runlist = np.random.choice(1000,nruns,False)
for run in range(len(runlist)):
    
    """
    Generating, scaling, and partitioning data
    """
    lselect=[]
    np.random.seed(runlist[run]) # Fixing trial seed
    t=time.time()
    
    # Drawing data
    Y,F,X,e,lselect,strue,sigma,select=draw_GP.draw_GP_ARD_lm(n,ntest,p,q,sigma2,corr,strue,ltrue,plot_YX=True,kern=kern,cop=False,r2=r2)
    
    # Scaling data (optional for Y,F)
    Y = Y.reshape(n+ntest,1)
    F = F.reshape(n+ntest,1)
    Y = (Y-Y[:n].mean())/Y[:n].var()**0.5
    X = (X-X[:n].mean(0))/X[:n].var(0)**0.5
    F = (F-F[:n].mean())/F[:n].var()**0.5

    # Getting training and test set
    ytest=Y[n:]
    Xtest=X[n:]
    ftest=F[n:]
    y=Y[:n]
    X=X[:n]
    f=F[:n]
    
    """
    Running simulation
    """
    args = []
    arg_vals = []
    for i in range(nmodels):
        args.append(["seed","subsample", "sampling_strat", "min_VBEM_iter", "max_VBEM_iter", "GP_fit_tol", "VBEM_tol", "max_GP_fit_iter", "init_GP_iter", "iter_remove", "print_VBEM", "learn_rate", "optimisation", "final_prune"])
        arg_vals.append([0,minibatch_size[i], "unif",5, 10, 1e-5, 0.1/p, 100,100, False, False, base[i], opt[i], True])

    test_algorithm = partial(diagnostics.get_pred_posterior_GP,reg = 0.01,kern = kernel_funcs.gaussian)

    Runtime[run], Lambda[run], L[run], L1norm[run], L2norm[run], MSE_F[run], MSE_Y[run], Acc[run], Weighted_Acc[run], TPR[run], TNR[run], PPV[run], NPV[run], AUC[run], MCC[run]= simulations.do_simulation_VBEMSSGP(
                               y, X, ftest, ytest, Xtest, q, algorithm_training = fit.VB_EM_GP_SS, algorithm_testing = test_algorithm, 
                               nmodels = m, args = args, arg_vals = arg_vals, post_fit = [False], SS_GP = [True], 
                               post_var =[True],order_relevant_vars = False, order_irrelevant_vars = False, 
                               VS_threshs = VS_threshs, select = select, predict_selected = False, hyper_opt =  [True], 
                               hyper_arg = ["v0","v1"],hyper_vals = [1e+4*2**np.linspace(np.log2(100),-np.log2(100),11),1e-4*2**np.linspace(np.log2(100),-np.log2(100),11)], 
                               ltrue=lselect, MC_pred = False, model_select = [False], post_fit_subsample=n, train = np.repeat(True,nmodels),
                               model_weighting = np.repeat("elpd",1))
    
    print("RUN {0}".format(run))
    print("Runtime mean is:", Runtime[:run+1].mean(0))
    print("Weighted accuracy mean is:", Weighted_Acc[:run].mean(0))
    print("TPR mean is:", TPR[:run+1].mean(0))
    print("PPV mean is:", PPV[:run+1].mean(0))
    print("MCC mean is:", MCC[:run+1].mean(0))
    print("L1norm mean is:", L1norm[:run+1].mean(0))
    print("L2norm mean is:", L2norm[:run+1].mean(0))
    print("MSE_F mean is:", MSE_F[:run+1].mean(0))
    print("MSE_Y mean is:", MSE_Y[:run+1].mean(0), "\n")
    

In [None]:
from datetime import date
Output = {"Runtime" : Runtime, "Lambda" : Lambda, "L" : L, "L1norm" : L1norm, "L2norm" : L2norm, "MSE_F" : MSE_F
        , "MSE_Y" : MSE_Y, "Acc" : Acc, "Weighted_Acc" : Weighted_Acc, "TPR" :TPR, "TNR" : TNR, "PPV" : PPV, "NPV" : NPV, "AUC" : AUC, "MCC" : MCC}
String = "E1_results_{0}".format(date.today())
np.save(String, Output) # saving