## COMMUNITIES AND CRIME dataset experiment


### (1) Getting imports and definitions
#### (libraries and functions)

In [None]:
# Imports 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import time

""" 
SS GP algorithm functions
"""

import os
os.chdir('C:/Users/hughw/Documents/MSC project/GP algorithms/Master function files')
from GP_funcs_ZTMFSS import kernel_funcs
from GP_funcs_ZTMFSS import model_funcs
from GP_funcs_ZTMFSS import draw_GP
from GP_funcs_ZTMFSS import fit
from GP_funcs_ZTMFSS import diagnostics
from GP_funcs_ZTMFSS import simulations
from functools import partial
os.chdir('C:/Users/hughw/Documents/MSC project/Real data')

"""
Importing in rpy2
"""

os.environ['R_USER'] = 'D:\Anaconda3\Lib\site-packages\rpy2'
import rpy2
print(rpy2.__version__)
import rpy2.robjects as robjects

from rpy2.robjects.packages import importr
# import R's "base" package
base = importr('base')
base.R_home()
# import R's "utils" package
utils = importr('utils')

# import rpy2's package module
import rpy2.robjects.packages as rpackages

# import R's utility package
utils = rpackages.importr('utils')

# select a mirror for R packages
utils.chooseCRANmirror(ind=1) # select the first mirror in the list

# Function definitions for comparators - MAKE IT SO THEY ALWAYS JUST RETURN PREDICTIONS AND CLASSIFICATIONS
"""
Importing in requirements for SVGP Pytorch
"""
import tqdm
import math
import torch
import gpytorch

"""
Defining procedure to run SVGP
"""

from gpytorch.models import ApproximateGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import VariationalStrategy
from torch.utils.data import TensorDataset, DataLoader

def SVGP_train(y, X, lengthscale_init = 10, num_inducing=100, epochs=100, batch_size=100, learn_rate_variational = 0.1, learn_rate_hyper = 0.01, tol = 1e-4,seed=0, min_epochs = 100
              , alpha = 0.05, print_=False):
    
    # setting dimensions
    ntrain,ntest, p = len(y), len(X), len(X.T)
    
    # loading in data
    train_dataset = TensorDataset(X, y)
    train_loader = DataLoader(train_dataset, batch_size, shuffle=True)

    # Creating model
    class GPModel(ApproximateGP):
        def __init__(self, inducing_points):
            variational_distribution = gpytorch.variational.NaturalVariationalDistribution(inducing_points.size(0))
            variational_strategy = VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True)
            super(GPModel, self).__init__(variational_strategy)
            self.mean_module = gpytorch.means.ConstantMean()
            self.covar_module = gpytorch.kernels.ScaleKernel(base_kernel=gpytorch.kernels.RBFKernel(ard_num_dims = p))

            # Initialize lengthscale
            if np.any(lengthscale_init):
                self.covar_module.base_kernel.lengthscale = np.ones(p)*lengthscale_init

        def forward(self, x):
            mean_x = self.mean_module(x)
            covar_x = self.covar_module(x)
            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
    np.random.seed(seed)
    inducing_points = X[np.random.choice(ntrain,num_inducing,False), :]
    model = GPModel(inducing_points=inducing_points)
    likelihood = gpytorch.likelihoods.GaussianLikelihood()

    if torch.cuda.is_available():
        model = model.cuda()
        likelihood = likelihood.cuda()

        num_epochs = epochs

    # Setting up model training
    t = time.time()
    torch.manual_seed(seed)
    model.train()
    likelihood.train()
    
    variational_ngd_optimizer = gpytorch.optim.NGD(model.variational_parameters(), num_data=train_y.size(0), lr=learn_rate_variational)

    hyperparameter_optimizer = torch.optim.Adam([
        {'params': model.hyperparameters()},
        {'params': likelihood.parameters()},
    ], lr=learn_rate_hyper)

    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0))
    
    # Setting up convergence criteria
    epochs_iter = tqdm.notebook.tqdm(range(epochs), desc="Epoch")
    i = 0
    param_diff=1
    loss_diff=1
    param = 1/model.covar_module.base_kernel.lengthscale
    
    # Training model
    while (i < epochs and loss_diff>0) or i<min_epochs:
        
        # Within each iteration, we will go over each minibatch of data
        minibatch_iter = tqdm.notebook.tqdm(train_loader, desc="Minibatch", leave=False)
        for x_batch, y_batch in minibatch_iter:
            
            ### Perform NGD step to optimize variational parameters
            variational_ngd_optimizer.zero_grad()
            output = model(x_batch)
            loss = -mll(output, y_batch)
            minibatch_iter.set_postfix(loss=loss.item())
            loss.backward()
            variational_ngd_optimizer.step()

            ### Perform Adam step to optimize hyperparameters
            hyperparameter_optimizer.zero_grad()
            output = model(x_batch)
            loss = -mll(output, y_batch)
            loss.backward()
            hyperparameter_optimizer.step()
        
        # Update convergence criteria
        i+=1
        param_old = param
        param = 1/model.covar_module.base_kernel.lengthscale
        param_diff = np.mean(np.abs((param-param_old).detach().numpy()))
        if i==1:
            loss_new = loss.item()
        else:
            loss_old = loss_new
            loss_new = loss.item()*alpha+(1-alpha)*loss_old
            loss_diff = loss_old - loss_new
        
        numprint = min(10,p)
        if print_:
            print(np.sort(model.covar_module.base_kernel.lengthscale.detach().numpy())[0][:numprint])
            print(loss_new, param_diff)
    print("Runtime is ", time.time()-t)
    
    return model, loss, likelihood

def SVGP_test(model,likelihood, ytest, Xtest, batch_size=100):

    test_dataset = TensorDataset(Xtest, ytest)
    test_loader = DataLoader(test_dataset, batch_size, shuffle=False)

    # Getting model evaluations
    model.eval()
    likelihood.eval()
    means = torch.tensor([0.])
    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            preds = model(x_batch)
            means = torch.cat([means, preds.mean.cpu()])
    means = means[1:]
    
    print('Test MSE: {}'.format(torch.mean(torch.abs(means - ytest.cpu())**2)))
    
    return means

"""
Defining procedure to run SGP
"""

from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel, InducingPointKernel
from gpytorch.distributions import MultivariateNormal

def SGP_train(y, X, lengthscale_init = 10, num_inducing=100, iterations=100, learn_rate = 0.1, tol = 1e-4, seed=0, min_iterations = 100, alpha = 0.05, print_=False):
    
    # setting dimensions
    ntrain,ntest, p = len(y), len(X), len(X.T)

    # Creating model
    
    class GPRegressionModel(gpytorch.models.ExactGP):
        def __init__(self, train_x, train_y, likelihood, inducing_points):
            super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
            self.mean_module = ConstantMean()
            self.base_covar_module = ScaleKernel(RBFKernel(ard_num_dims = p))
            self.covar_module = InducingPointKernel(self.base_covar_module, inducing_points=inducing_points, likelihood=likelihood)

            # Initialize lengthscale
            if np.any(lengthscale_init):
                self.base_covar_module.base_kernel.lengthscale = np.ones(p)*lengthscale_init

        def forward(self, x):
            mean_x = self.mean_module(x)
            covar_x = self.covar_module(x)
            return MultivariateNormal(mean_x, covar_x)

        np.random.seed(seed)
    
    inducing_points = X[np.random.choice(ntrain,num_inducing,False), :]
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = GPRegressionModel(train_x, train_y, likelihood, inducing_points)

    if torch.cuda.is_available():
        model = model.cuda()
        likelihood = likelihood.cuda()

    # Setting up model training
    t = time.time()
    torch.manual_seed(seed) 
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)

    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
    
    # Setting up convergence criteria
    i = 0
    param_diff=1
    loss_diff=1
    param = 1/model.base_covar_module.base_kernel.lengthscale
    
    # Training model
    while (i < iterations and loss_diff>0) or i<min_iterations:
        
        # Zero backprop gradients
        optimizer.zero_grad()
        # Get output from model
        output = model(train_x)
        # Calc loss and backprop derivatives
        loss = -mll(output, train_y)
        loss.backward()
        optimizer.step()
        torch.cuda.empty_cache()
        
        # Update convergence criteria
        i+=1
        param_old = param
        param = 1/model.base_covar_module.base_kernel.lengthscale
        param_diff = np.mean(np.abs((param-param_old).detach().numpy()))
        if i==1:
            loss_new = loss.item()
        else:
            loss_old = loss_new
            loss_new = loss.item()*alpha+(1-alpha)*loss_old
            loss_diff = loss_old - loss_new
        
        numprint = min(10,p)
        if not i % 1:
            if print_:
                print(np.sort(model.base_covar_module.base_kernel.lengthscale.detach().numpy())[0][:numprint])
                print(loss_new, param_diff)
                print('Iter %d - Loss: %.3f' % (i + 1, loss.item()))
    print("Runtime is ", time.time()-t)
    
    return model, loss, likelihood

def SGP_test(model,likelihood, ytest, Xtest):
    
    # Getting model evaluations
    model.eval()
    likelihood.eval()
    with gpytorch.settings.max_preconditioner_size(10), torch.no_grad():
        preds = model(Xtest)
    
    print('Test MSE: {}'.format(torch.mean(torch.abs(preds.mean - ytest.cpu())**2)))
    
    return preds.mean

### (2) Getting data 
#### (takes in a file and outputs cleaned data y, X)

In [None]:
# Inputting raw data
from matplotlib import rcParams, rc_file_defaults
rc_file_defaults()
plt.rc('axes',edgecolor='black')
font = {'family' : 'normal',
        'size'   : 15}
plt.rc('font', **font)
plt.rcParams.update({'text.color' : "black",
                      'xtick.color' : "black",
                      'ytick.color' : "black",
                     'axes.labelcolor' : "black"})

df = pd. read_csv ("communities.txt", sep=",") 
data = np.array(df)

# Box cox transformations to dependent
y = data[:,len(data.T)-1].astype(float)
plt.hist(y)

fig = plt.figure()
ax1 = fig.add_subplot(311)
prob = stats.probplot(y, dist=stats.norm, plot=ax1)
ax1.set_xlabel('')
ax1.set_title('Probplot against normal distribution')

ax2 = fig.add_subplot(312)
yt, _ = stats.boxcox(y+1e-10)
prob = stats.probplot(yt, dist=stats.norm, plot=ax2)
ax2.set_title('Probplot after Box-Cox transformation')
plt.show()

# Counting NAs and removing NA columns 
NA_ind = data[:,5:]=='?'
NA_rowcount = np.sum(NA_ind, 0)
D = data[:,5:][:,NA_rowcount==0].astype(float)
print("dimensions are : ", np.shape(D))

# Getting correlations
n,p = np.shape(D)
d = p-1
corrxy = np.zeros(p)
for i in range(len(D.T)-1):
     corrxy[i]=np.corrcoef(D[:,i],yt)[0,1]
sns.set(font_scale = 3)
fig,axs = plt.subplots(figsize = (25,20))
fig.set_facecolor('white')
axs.set_facecolor('white')
plt.bar(range(p),height = corrxy, color = "blue")
plt.title("Correlation between y, X")
plt.show()
fig.savefig("CorryX_communities")
fig,axs = plt.subplots(figsize = (15,10))
sns.heatmap(np.abs(np.corrcoef(D[:,:len(D.T)-1].T)), label = "Correlation matrix", yticklabels = False, xticklabels = False, color = "black")
plt.title(r"Communities and crime $\rho_{XX}$", fontsize = 50, color = "black")
fig.savefig("CorrX_communities")

# get y, X
y = yt
X = D[:,:len(D.T)-1]

### (3) Setting up experiment dimensions 
#### (determine random train:test splits and set up storage objects)

In [None]:
# Train:test splitting settings
n_replications = 10
ntrain = int(0.8*n)
ntest = n - ntrain
nmodel = 3
minibatch = 256
nns=256
kern = kernel_funcs.gaussian
grad_kern = kernel_funcs.grad_gaussian
post_var = False
post_cov = False
train_largest = True
MC=1000
reg = 0.01
v_l0, v_l1, a,b = 1e+4,1e-4,1e-3,1e-3
ELBO_sample=1000
temp=1
scale_vals = 2**np.linspace(np.log2(100),-np.log2(100),11)

# Storage objects
MSE = np.zeros((n_replications, nmodel))
Runtime = np.zeros((n_replications, nmodel))
L = np.zeros((n_replications, nmodel, d))
Lambda = np.zeros((n_replications, nmodel, d))

### (4) Iterating over train:test splits

In [None]:
"""
BMA over different minibatch sizes
"""
i=0
j=0
while i < n_replications: # use just three runs as takes too long to do more!
    np.random.seed(j)
    j+=1
    
    shuffled_indexes = np.random.choice(n,n,False)
    y_shuffle = y[shuffled_indexes]
    X_shuffle = X[shuffled_indexes]
    
    if np.min(X_shuffle[:ntrain].var(0))>0:
        
            
    # Get current train:test split
        ytrain = ((y_shuffle[:ntrain]-y_shuffle[:ntrain].mean())/y_shuffle[:ntrain].var()**0.5).reshape(ntrain,1)
        ytest = ((y_shuffle[ntrain:]-y_shuffle[:ntrain].mean())/y_shuffle[:ntrain].var()**0.5).reshape(ntest,1)

        Xtrain = (X_shuffle[:ntrain] - X_shuffle[:ntrain].mean(0))/X_shuffle[:ntrain].var(0)**.5
        Xtest = (X_shuffle[ntrain:] - X_shuffle[:ntrain].mean(0))/X_shuffle[:ntrain].var(0)**.5

    # Run all SS-GP algorithms,  get predictions and store performance

        """
        Initialising model weight vector
        """
        weights = np.zeros(11)
        
        """
        BMA over X_scale (256)
        """
        m =0
        t = time.time()

        # Running algorithm
        testing_algorithm = partial(diagnostics.get_pred_posterior_GP_NN,reg = 0.01 ,kern = kern, grad_kern = grad_kern, latents = False, pred_selected = True, post_var = False, NN=nns, print_=True)
        hyper_vals = [1e+4*scale_vals,1e-4*scale_vals, np.linspace(0,10,11).astype(int)]
        hyper_arg = ["v0","v1", "seed"]
        best_pair, selections, losses, Results = fit.hyper_opt_SSGP(
                                            ytrain, Xtrain, fit.VB_EM_GP_SS, testing_algorithm, hyper_arg, hyper_vals, method =  "ML", metric = "elbo", 
                                            training_args=["final_ELBO_sample", "seed", "iter_remove", "print_VBEM", "learn_rate", "subsample", "sampling_strat", "min_VBEM_iter","init_GP_iter", "max_VBEM_iter", "GP_fit_tol", "VBEM_tol"], 
                                            training_arg_vals=[0, i, True, False, 0.0025, 256, "nn" ,5, 100,  5 ,           1e-5,         0.1/d, 1])

        t1 = time.time()

        # MSE WEIGHTING
        
        log_predictives = np.zeros(len(Results))
        for k in np.where(selections!=0)[0]:
            log_predictives[k] =  diagnostics.get_pred_posterior_GP_NN_CV(ytrain,Xtrain,Results[j],0.01,kern,NN=64, fraction=n**-0.5,post_var=True, print_=False, use_tree=False, leaf_size=100, seed=0)
            print(k)
        log_predictives[np.where(selections==0)[0]]=np.min(log_predictives)-1000
        
        train_time = time.time()-t

        min_loss = np.max(log_predictives)
        weights = np.exp(log_predictives - min_loss)*(np.abs(log_predictives - min_loss)<=500)
        weights = weights/weights.sum()

        Lmbda = np.zeros((len(Results), d))
        Ls = np.zeros((len(Results), d))
        for k in range(len(Results)):
            Lmbda[k] = Results[k][3]
            Ls[k] = np.abs(Results[k][0][0])         
        PIP = Lmbda.T @ weights
        l = Ls.T @ weights

        BMA_preds = diagnostics.get_BMA_predictions(ytrain,Xtrain,Xtest,testing_algorithm, Results,weights, MC_samples=MC)
        MSE[i,m] = simulations.MSE_pc(BMA_preds[1],ytest)
        Runtime[i,m] = time.time()-t
        L[i,m,:] = l
        Lambda[i,m,:] = PIP
        print("MSE is : ", MSE[i,m])
        print("Runtime is : ", Runtime[i,m])
        
        """
        BMA over X_scale (128)
        """
        m += 1 
        t = time.time()

        # Running algorithm
        hyper_vals = [1e+4*scale_vals,1e-4*scale_vals, np.linspace(0,10,11).astype(int)]
        hyper_arg = ["v0","v1", "seed"]
        best_pair, selections, losses, Results = fit.hyper_opt_SSGP(
                                            ytrain, Xtrain, fit.VB_EM_GP_SS, testing_algorithm, hyper_arg, hyper_vals, method =  "ML", metric = "elbo", 
                                            training_args=["final_ELBO_sample", "seed", "iter_remove", "print_VBEM", "learn_rate", "subsample", "sampling_strat", "min_VBEM_iter","init_GP_iter", "max_VBEM_iter", "GP_fit_tol", "VBEM_tol"], 
                                            training_arg_vals=[0, i, True, False, 0.005, 128, "nn" ,5, 100,  5 ,           1e-5,         0.1/d, 1])

        t1 = time.time()

        log_predictives = np.zeros(len(Results))
        for k in np.where(selections!=0)[0]:
            log_predictives[k] =  diagnostics.get_pred_posterior_GP_NN_CV(ytrain,Xtrain,Results[j],0.01,kern,NN=64, fraction=n**-0.5,post_var=True, print_=False, use_tree=False, leaf_size=100, seed=0)
            print(k)
        log_predictives[np.where(selections==0)[0]]=np.min(log_predictives)-1000 
        
        train_time = time.time()-t

        min_loss = np.max(log_predictives)
        weights = np.exp(log_predictives - min_loss)*(np.abs(log_predictives - min_loss)<=500)
        weights = weights/weights.sum()

        Lmbda = np.zeros((len(Results), d))
        Ls = np.zeros((len(Results), d))
        for k in range(len(Results)):
            Lmbda[k] = Results[k][3]
            Ls[k] = np.abs(Results[k][0][0])         
        PIP = Lmbda.T @ weights
        l = Ls.T @ weights

        BMA_preds = diagnostics.get_BMA_predictions(ytrain,Xtrain,Xtest,testing_algorithm, Results,weights, MC_samples=MC)
        MSE[i,m] = simulations.MSE_pc(BMA_preds[1],ytest)
        Runtime[i,m] = time.time()-t
        L[i,m,:] = l
        Lambda[i,m,:] = PIP
        print("MSE is : ", MSE[i,m])
        print("Runtime is : ", Runtime[i,m])
        
        """
        BMA over X_scale (64)
        """
        m +=1
        t = time.time()

        # Running algorithm
        hyper_vals = [1e+4*scale_vals,1e-4*scale_vals, np.linspace(0,10,11).astype(int)]
        hyper_arg = ["v0","v1", "seed"]
        best_pair, selections, losses, Results = fit.hyper_opt_SSGP(
                                            ytrain, Xtrain, fit.VB_EM_GP_SS, testing_algorithm, hyper_arg, hyper_vals, method =  "ML", metric = "elbo", 
                                            training_args=["final_ELBO_sample", "seed", "iter_remove", "print_VBEM", "learn_rate", "subsample", "sampling_strat", "min_VBEM_iter","init_GP_iter", "max_VBEM_iter", "GP_fit_tol", "VBEM_tol"], 
                                            training_arg_vals=[0, i, True, False, 0.01, 64, "nn" ,5, 100,  5 ,           1e-5,         0.1/d, 1])

        t1 = time.time()

        log_predictives = np.zeros(len(Results))
        for k in np.where(selections!=0)[0]:
            log_predictives[k] =  diagnostics.get_pred_posterior_GP_NN_CV(ytrain,Xtrain,Results[j],0.01,kern,NN=64, fraction=n**-0.5,post_var=True, print_=False, use_tree=False, leaf_size=100, seed=0)
            print(k)
        log_predictives[np.where(selections==0)[0]]=np.min(log_predictives)-1000    
        train_time = time.time()-t

        min_loss = np.max(log_predictives)
        weights = np.exp(log_predictives - min_loss)*(np.abs(log_predictives - min_loss)<=500)
        weights = weights/weights.sum()

        Lmbda = np.zeros((len(Results), d))
        Ls = np.zeros((len(Results), d))
        for k in range(len(Results)):
            Lmbda[k] = Results[k][3]
            Ls[k] = np.abs(Results[k][0][0])         
        PIP = Lmbda.T @ weights
        l = Ls.T @ weights

        BMA_preds = diagnostics.get_BMA_predictions(ytrain,Xtrain,Xtest,testing_algorithm, Results,weights, MC_samples=MC)
        MSE[i,m] = simulations.MSE_pc(BMA_preds[1],ytest)
        Runtime[i,m] = time.time()-t
        L[i,m,:] = l
        Lambda[i,m,:] = PIP
        
        
        
        print("MSE is : ", MSE[i,m])
        print("Runtime is : ", Runtime[i,m])
          
        print("MSE mean is : ", MSE[:i+1].mean(0))
        print("Runtime mean is : ", Runtime[:i+1].mean(0))
        
        i+= 1

In [None]:
"""
Runs for SVGP/SGP only
"""
i=0
j=0
while i < n_replications: # use just three runs as takes too long to do more!
    np.random.seed(j)
    j+=1
    
    shuffled_indexes = np.random.choice(n,n,False)
    y_shuffle = y[shuffled_indexes]
    X_shuffle = X[shuffled_indexes]
    
    if np.min(X_shuffle[:ntrain].var(0))>0:
            
    # Get current train:test split
        ytrain = ((y_shuffle[:ntrain]-y_shuffle[:ntrain].mean())/y_shuffle[:ntrain].var()**0.5).reshape(ntrain,1)
        ytest = ((y_shuffle[ntrain:]-y_shuffle[:ntrain].mean())/y_shuffle[:ntrain].var()**0.5).reshape(ntest,1)

        Xtrain = (X_shuffle[:ntrain] - X_shuffle[:ntrain].mean(0))/X_shuffle[:ntrain].var(0)**.5
        Xtest = (X_shuffle[ntrain:] - X_shuffle[:ntrain].mean(0))/X_shuffle[:ntrain].var(0)**.5

       
        """
        SGP
        """
        m += 1
        Ys = ((y_shuffle-y_shuffle[:ntrain].mean())/y_shuffle[:ntrain].var()**0.5).reshape(n,1)
        Xs = ((X_shuffle-X_shuffle[:ntrain].mean(0))/X_shuffle[:ntrain].var(0)**0.5).reshape(n,d)
        Xtorch = torch.from_numpy(Xs).float()
        Ytorch = torch.from_numpy(Ys.reshape(ntrain+ntest,)).float()


        train_x = Xtorch[:ntrain, :]
        train_y = Ytorch[:ntrain]
        test_x = Xtorch[ntrain:, :]
        test_y = Ytorch[ntrain:]

        t = time.time()
        model,loss,likelihood = SGP_train(train_y, train_x, lengthscale_init = [], num_inducing=512, iterations=100, learn_rate = 0.1, 
                                    seed = 0, min_iterations=100, alpha = 0.01)
        preds = SVGP_test(model,likelihood,test_y, test_x, batch_size=512)
        Runtime[i,m] = time.time()-t
        MSE[i,m] = simulations.MSE_pc(np.array(preds),np.array(test_y))
        print("MSE is : ", MSE[i,m])
        print("Runtime is : ", Runtime[i,m])
        L[i,m,:] = 1/model.covar_module.base_kernel.lengthscale.detach().numpy()[0]

        """
        SVGP
        """
        m += 1
        model,loss,likelihood = SVGP_train(train_y, train_x, lengthscale_init = [], num_inducing=1024, epochs=100, batch_size=1024, learn_rate_variational = 0.01, 
                                           learn_rate_hyper = 0.01, tol = 1e-3, seed = 0, min_epochs = 100, alpha = 0.01)

        preds = SVGP_test(model,likelihood,test_y, test_x, batch_size=1024)
        Runtime[i,m] = time.time()-t
        MSE[i,m] = simulations.MSE_pc(np.array(preds),np.array(test_y))
        L[i,m,:] = 1/model.covar_module.base_kernel.lengthscale.detach().numpy()[0]
        print("MSE is : ", MSE[i,m])
        print("Runtime is : ", Runtime[i,m])

        print("MSE mean is : ", MSE[:i+1].mean(0))
        print("Runtime mean is : ", Runtime[:i+1].mean(0))
        
        i+= 1

### Saving and exporting data

In [None]:
os.chdir('C:/Users/hughw/Documents/MSC project/Real data/Communities and crime')
from datetime import date
Output = {"L" : L[:10], "Lambda" : Lambda[:10],"MSE" : MSE[:10], "Runtime" : Runtime[:10]}
String = "EXPERIMENT_{0}_Communities and crime 10 trials".format(
    date.today())
np.save(String, Output) # saving