In [1]:
### import Necessary packages
import sys
# sys.path.append('../../')
import numpy as np
import tensorflow as tf
import time
import warnings
import pandas as pd
import argparse
from sklearn.model_selection import StratifiedKFold ## for c-v
warnings.filterwarnings('ignore')  # ignore DeprecationWarnings from tensorflow

import gpflow
from gpflow.ci_utils import ci_niter ## for the number of training
from MOGP_convolutionalKernel.utils import run_adam, Performance_measure, Performance_list ## Import optimization
from MOGP_convolutionalKernel.Building_Models import Build_MOGP_AR_RBF

# reproducibility
import random
random.seed(3)
np.random.seed(1)
tf.random.set_seed(24)

# Performance metric
from sklearn.metrics import recall_score
from sklearn.datasets import make_classification as mc
import math
## Cross-validation
from sklearn.model_selection import train_test_split

#### Data set  

In [2]:
Importdata = pd.read_table('Data_set/balance-scale.data', sep=',', header=None)
npdata = np.array(Importdata)
npdata = np.where(npdata == 'L', 0, npdata)
npdata = np.where(npdata == 'R', 1, npdata)
npdata = np.where(npdata == 'B', 2, npdata)
Data = npdata.astype('float64')
X = Data[:, 1:]
Y = Data[:, 0][:, None].astype('int64')

In [3]:
sfolder = StratifiedKFold(n_splits=5, random_state=None, shuffle=False)

#### Set up parameter

In [4]:
Size_Minibatch = 200 ## the 
Number_train = 4000
Num_U_CV = [1,2,3]
ns = [1]
whole_latent_f = 3 
num_sample = [2]
num_latent_f_list = [3] 
num_output = 1
cv_measure = 'Recall'

#### MOGP_AR Performacne Metric#####

In [5]:
# Performance Metric
time_training_MOGP_AR, Prediction_error_MOGP_AR, Precision_weighted_MOGP_AR,Recall_weighted_MOGP_AR, F1_weighted_MOGP_AR = Performance_list()
MOGP_AR_U = []
MOGP_AR_whole_time =[]

#### Set up and run MOGPs-AR

In [6]:
for train_index, test_index in sfolder.split(X, Y):
    Xtrain, Xtest = X[train_index], X[test_index]
    Ytrain, Ytest = Y[train_index], Y[test_index]

    ### minibatch_size
    minibatch_size = Size_Minibatch
    ### maxiter interation
    maxiter = ci_niter(Number_train)
    
    
    ### Find the optimizal number of u
    ###  Build our model and run times and optimization
    X_train_cv, X_val, Y_train_cv, Y_val = train_test_split(Xtrain, Ytrain, test_size = 0.2, random_state = 0)
    ### MOGP_AR
    a_MOGP_AR_whole_time = time.time()
    CV_error_AR = []
    for N_U in Num_U_CV:      
        MOGP_AR_cv, data_MOGP_AR_cv, N_MOGP_AR_cv = Build_MOGP_AR_RBF(X_train_cv, Y_train_cv, Num_U = N_U,
                                                                      Num_sub = ns,
                                                                      Whole_f = whole_latent_f,
                                                                      num_subsample = num_sample,
                                                                      num_latent_f= num_latent_f_list,
                                                                      num_output = num_output,
                                                                      Size_minibatch = minibatch_size)
        logf_mogp_ar_cv = run_adam(MOGP_AR_cv, maxiter, data_MOGP_AR_cv, N_MOGP_AR_cv, minibatch_size)
        X_pre_MOGP_AR_cv = np.hstack((X_val, np.zeros_like(X_val[:, 0][:, None])))
        mu_y_MOGP_AR_cv, _ = MOGP_AR_cv.predict_y_categorical(X_pre_MOGP_AR_cv)
        if cv_measure == 'Recall':
            Test_error_AR_cv = recall_score(Y_val, np.argmax(mu_y_MOGP_AR_cv[0], axis=1)[:, None], average='weighted')
        else:
            print('Please print the correct the performance measure')
            break
        CV_error_AR.append(Test_error_AR_cv)
    Optimal_Num_u_MOGP_AR = Num_U_CV[CV_error_AR.index(max(CV_error_AR))]
    MOGP_AR, data_MOGP_AR, N_MOGP_AR = Build_MOGP_AR_RBF(Xtrain, Ytrain, Num_U=Optimal_Num_u_MOGP_AR,
                                                         Num_sub = ns,
                                                         Whole_f = whole_latent_f,
                                                         num_subsample = num_sample,
                                                         num_latent_f = num_latent_f_list,
                                                         num_output = num_output,
                                                         Size_minibatch = minibatch_size)
    a_MOGP_AR = time.time()
    logf_mogp_AR = run_adam(MOGP_AR, maxiter, data_MOGP_AR, N_MOGP_AR, minibatch_size)
    b_MOGP_AR = time.time()
    print('runing time:', b_MOGP_AR - a_MOGP_AR)
    time_training_MOGP_AR.append(b_MOGP_AR - a_MOGP_AR)
    MOGP_AR_whole_time.append(b_MOGP_AR - a_MOGP_AR_whole_time)
    ## Prediction
    X_pre_MOGP_AR = np.hstack((Xtest, np.zeros_like(Xtest[:, 0][:, None])))
    mu_y_MOGP_AR, _ = MOGP_AR.predict_y_categorical(X_pre_MOGP_AR)
    MOGP_AR_U.append(Optimal_Num_u_MOGP_AR)


        
    ####### Performance Measure         
    ## MOGP_AR ##
    _, P_w_MOGP_AR, R_w_MOGP_AR,F1_w_MOGP_AR = Performance_measure(mu=np.argmax(mu_y_MOGP_AR[0], axis=1), Yest=Ytest)

    ### Precision
    Precision_weighted_MOGP_AR.append(P_w_MOGP_AR)
    ### Recall
    Recall_weighted_MOGP_AR.append(R_w_MOGP_AR)
    ### F1
    F1_weighted_MOGP_AR.append(F1_w_MOGP_AR)

-501.0612522983451
-333.5179018472001
-121.67869809490199
-92.01269397240227
-103.5287012701784
-93.75055900953245
-84.68620309424014
-81.87593377130418
-92.73417838358807
-89.01179919313029
-96.72207597559579
-79.19634709339135
-91.3873684932435
-76.94027354511314
-98.05416282853203
-86.2536093555467
-81.73257062250404
-82.71412142387818
-103.19954494371785
-107.53110380125399
-80.2924453983074
-94.90563084330299
-82.5817544103634
-67.80708892873783
-78.40342041812609
-79.92881321713938
-74.48524975288218
-89.22206478919976
-71.04886287434734
-76.63279310677493
-79.67517350098889
-75.96803062771517
-82.5909484995549
-81.49207584078748
-75.21348634078493
-78.35875681296744
-68.23665998006891
-65.53539277491932
-79.3081043023798
-70.57780202877532
-762.1575965417371
-168.97333310792618
-135.08295369902845
-127.06541375835144
-101.99174091419175
-103.74905449716238
-109.70305656820953
-103.85627066068695
-92.14555732319846
-80.8160934521529
-89.9249021619041
-75.59547238476864
-88.422889

-86.37447561585847
-65.5914455303533
-77.81696223702218
-70.57016832337267
-68.03791392974779
-76.3714685614674
-70.17557810069209
-65.78612703060337
-87.98792564037961
-81.24187478713657
-73.50665147710882
-67.8509406568409
-67.14317447914178
-73.97401718312365
-1087.3616844244953
-255.90943717536334
-187.70369190837528
-166.70811472467366
-188.50098008423384
-160.20758786311893
-153.9825680594373
-153.1901054020933
-138.58432280053592
-101.79227789461629
-125.00779240451784
-121.89313420056587
-108.23614079811824
-100.9246132603598
-93.08310038145594
-107.51922052865947
-102.94811906365597
-97.73376373397369
-107.26574977025253
-86.8364671819055
-87.0611455783407
-79.50632643210243
-80.7211942229084
-77.11630674037882
-80.83424098664554
-108.73591975940212
-79.07548137865408
-95.27709862409583
-92.43189350052306
-84.60367696125431
-91.79304254253789
-90.03982244500793
-74.2742786799462
-84.76452216147987
-88.2716949922854
-80.14847124896102
-104.86417131335352
-83.47884326448323
-93.

In [7]:
print('Training Time mean:', [np.mean(time_training_MOGP_AR)])
print('Training Time std:',[np.std(time_training_MOGP_AR)])

Training Time mean: [54.335476112365725]
Training Time std: [5.503073564179976]


In [8]:
print('Mean Precision-Weighted:', [np.mean(Precision_weighted_MOGP_AR)])
print('Std Precision-Weighted:', [np.std(Precision_weighted_MOGP_AR)])
print('Mean Recall-Weighted:', [np.mean(Recall_weighted_MOGP_AR)])
print('Std Recall-Weighted:', [np.std(Recall_weighted_MOGP_AR)])
print('Mean F1-Weighted:', [np.mean(F1_weighted_MOGP_AR)])
print('Std F1-Weighted:', [np.std(F1_weighted_MOGP_AR)])

Mean Precision-Weighted: [0.9462088134088134]
Std Precision-Weighted: [0.03973164100298759]
Mean Recall-Weighted: [0.9391999999999999]
Std Recall-Weighted: [0.037727443592165094]
Mean F1-Weighted: [0.939969637923246]
Std F1-Weighted: [0.03949814059718927]
