In [1]:
import tensorflow.compat.v1 as tf
import scipy.io
import numpy as np
import os
import random
import math
import pandas as pd
import seaborn as sns
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import Lasso
from sklearn.feature_selection import RFECV, RFE
import multiprocessing
import datetime
import hdf5storage
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC

In [2]:
def fnn(x, input_size, output_size, keep_prob, stddev=0.01, constant=0.0001, dropout=True, end=False):
    fc_w = tf.Variable(tf.truncated_normal([input_size,output_size], stddev=stddev,seed=np.random.seed(2018)))
    fc_b = tf.Variable(tf.constant(constant,shape=[output_size]), dtype=tf.float32)
    fc_h = tf.nn.relu(tf.matmul(x,fc_w)+fc_b) if not end else tf.matmul(x,fc_w)+fc_b
    return tf.nn.dropout(fc_h, keep_prob,seed=np.random.seed(2018)) if dropout else fc_h

In [3]:
def fcn(x, input_size, output_size, nlayers, nparameters, keep_prob):
    if nlayers == 1:
        h1 = fnn(x, input_size, output_size, keep_prob, end=True)
    elif nlayers == 2:
        h1 = fnn(fnn(x, input_size, nparameters, keep_prob, end=False), nparameters, output_size, keep_prob, end=True)
    elif nlayers >= 3:
        h0 = fnn(x, input_size, nparameters, keep_prob, end=False)
        for j in range(0,nlayers-2):
            if j == 0:
                h1 = fnn(h0, nparameters, nparameters, keep_prob, end=False)
            else:
                h1 = fnn(h1, nparameters, nparameters, keep_prob, end=False)
        h1 = fnn(h1, nparameters, output_size, keep_prob, end=True)
    else:
        print("# of layers can't be smaller than 0")
    return h1

In [4]:
def rfc(train_data, train_label, test_data, test_label):
    rf = RandomForestClassifier(n_estimators=150,
                                    criterion='gini',
                                    max_depth=None,
                                    min_samples_split=2,
                                    min_samples_leaf=1,
                                    min_weight_fraction_leaf=0.0,
                                    max_features=None,
                                    max_leaf_nodes=None,
                                    bootstrap=True,
                                    oob_score=False,
                                    n_jobs=-1,
                                    random_state=123,
                                    verbose=0,
                                    warm_start=False,
                                    class_weight=None)
    rf.fit(train_data, train_label.ravel())
    result = rf.predict_proba(test_data)
    acc = 0.0
    for i in range(np.shape(test_data)[0]):
        r = np.argmax(result[i])
        if r == test_label[i]:
            acc += 1
    acc /= np.shape(test_data)[0]
    acc *= 100
    return acc, result

In [5]:
def KNN(train_data, train_label, test_data, test_label):
    neigh = KNeighborsClassifier(n_neighbors=3)
    neigh.fit(train_data, train_label.ravel())
    result = neigh.predict_proba(test_data)
    acc = 0.0
    for i in range(np.shape(test_data)[0]):
        r = np.argmax(result[i])
        if r == test_label[i]:
            acc += 1
    acc /= np.shape(test_data)[0]
    acc *= 100
    return acc, result

In [7]:
def ovr(train_data, train_label, test_data, test_label):
    clf = OneVsRestClassifier(SVC(probability=True))
    clf.fit(train_data, train_label.ravel())
    result = clf.predict_proba(test_data)
    
    acc = 0.0
    for i in range(np.shape(test_data)[0]):
        r = np.argmax(result[i])
        if r == test_label[i]:
            acc += 1
    acc /= np.shape(test_data)[0]
    acc *= 100
    return acc, result

In [8]:
def dnn(train_data, train_label, test_data, test_label):
    g = tf.Graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    batch_size = 10
    input_size = np.shape(train_data)[1]
    output_size = 31

    with g.as_default():
        p_x = tf.placeholder(tf.float32, [batch_size, 1, input_size, 1])
        p_y = tf.placeholder(tf.float32, [batch_size, output_size])
        keep_prob = tf.placeholder(tf.float32)
        h10_flat = tf.reshape(p_x, [batch_size,-1])
        h1 = fnn(h10_flat, input_size, 2048, keep_prob, end=False)
        h2 = fnn(h1, 2048, 2048, keep_prob, end=False)
        h3 = fnn(h2, 2048, 31, keep_prob, end=True)
        h4 = tf.reshape(h3, [batch_size, 31])
        h_c = tf.nn.softmax(h4)
        loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(labels=p_y, logits=h4))
        optim = tf.train.AdamOptimizer(1e-5)
        trainer = optim.minimize(loss)
    
    accuracy = 0.0
    result = np.zeros([np.shape(test_data)[0], 31])
    with tf.Session(graph=g, config=config) as sess:
        sess.run(tf.global_variables_initializer())
        for e in range(0,120):
            loss_tot = 0.0
            for i in range(0,int(np.ceil(np.shape(train_data)[0]/batch_size))):
                a = np.random.randint(0,np.shape(train_data)[0],size=batch_size)
                x = train_data[a].reshape([batch_size, 1, input_size, 1])#[4,1,18181,1]
                y = np.zeros([batch_size, output_size])
                index = train_label[a]
                for u in range(0,batch_size):
                    y[u,index[u]] = 1
                _ , loss_val = sess.run([trainer, loss], feed_dict={p_x:x, p_y:y, keep_prob:0.6})
                loss_tot += loss_val
            print("%d epoch Loss: %f" % (e,(loss_tot)/np.shape(train_data)[0]))
        temp = 0
        for i in range(0,int(np.floor(np.shape(test_data)[0]/batch_size))):
            x = test_data[i*batch_size:(i+1)*batch_size].reshape([batch_size, 1, input_size, 1])
            out = sess.run(h_c, feed_dict={p_x:x, keep_prob:1})
            for j in range(0, batch_size):
                t = np.squeeze(out[j])
                result[temp] = t
                temp+=1
        remain = int(np.shape(test_data)[0]-np.floor(np.shape(test_data)[0]/batch_size)*batch_size)
        if remain > 0:
            x = test_data[-batch_size-1:-1].reshape([batch_size, 1, input_size, 1])
            out = sess.run(h_c, feed_dict={p_x:x, keep_prob:1})
            for j in range(0,int(remain)):
                t = np.squeeze(out[j+(batch_size-remain)])
                result[temp] = t
                temp+=1
        for i in range(0,np.shape(test_data)[0]):
            ind = np.argmax(np.squeeze(result[i]))
            if ind == test_label[i]:
                accuracy += 1
        accuracy /= np.shape(test_data)[0]*0.01
        sess.close()
    return accuracy, result

In [9]:
dataID = hdf5storage.loadmat('data.mat')
data = np.array(dataID['data'], dtype=np.float32)
gt1 = scipy.io.loadmat('label.mat')
label = np.array(gt1['label'], dtype=np.int32)

In [10]:
Outer_loop = 2
Inner_loop = 2

In [None]:
#################################################################################################

In [None]:
#DNN RandonForest

In [None]:
if __name__ == "__main__":
#Load data

    
    #Initialize
    label -= 1
    np.random.seed(2018)


    
    t_index = np.random.permutation(int(np.shape(data)[0]/Outer_loop)*Outer_loop)
    t_index = np.reshape(t_index, [Outer_loop, -1])
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"]="0" 
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    
    box = np.array([4000], dtype=np.int32)
    flag = 0
    for test_index in t_index:
        if flag == Outer_loop-1:
            test_index = np.array(np.concatenate((test_index, np.array(range(int(np.shape(data)[0]/Outer_loop)*Outer_loop,np.shape(data)[0]))), axis=0), dtype=np.int32)
        train_index = np.setdiff1d(np.array(range(0,np.shape(data)[0])), test_index)
        train_data = data[train_index]
        train_label = label[train_index]
        test_data = data[test_index]
        test_label = label[test_index]
        
        kf = np.random.permutation(int(np.shape(train_data)[0]/Inner_loop)*Inner_loop)
        kf = kf.reshape([Inner_loop]+[-1])
        val_result = np.zeros([np.shape(train_data)[0],48], dtype=np.float32)
        
        tot_acc = np.zeros([Inner_loop,5], dtype=np.float32)
        lsvc = LinearSVC(C=1, penalty="l1", dual=False).fit(data, label)
        coef = np.squeeze(np.sum(np.square(np.array(lsvc.coef_)), axis=0))
        coefidx = np.argsort(coef)

        
        u = np.sum(tot_acc,0)
       
        best_n = box[np.argmax(u)]
        idx = coefidx[-best_n:]
        
        tr_data = train_data[:,idx]
        te_data = test_data[:,idx]
        nn_acc, result_nn = dnn(tr_data, train_label, te_data, test_label)
        rf_acc, result_rf = rfc(tr_data, train_label, te_data, test_label)
        en_acc = 0.0
        for i in range(0,np.shape(te_data)[0]):
            r = np.argmax(result_nn[i]+result_rf[i])
            if r == test_label[i]:
                en_acc += 1
        en_acc /= np.shape(te_data)[0]*0.01
        print("Outer_fold # of features:  %d, Neural network accuracy: %f, Random forests accuracy: %f, Ensemble accuracy: %f" % (best_n, nn_acc, rf_acc, en_acc))
        flag += 1

  return f(**kwargs)


0 epoch Loss: 3.372628
1 epoch Loss: 3.203314
2 epoch Loss: 3.062867
3 epoch Loss: 2.923363
4 epoch Loss: 2.826230
5 epoch Loss: 2.701366
6 epoch Loss: 2.626163
7 epoch Loss: 2.558419
8 epoch Loss: 2.450387
9 epoch Loss: 2.407938
10 epoch Loss: 2.347855
11 epoch Loss: 2.318777
12 epoch Loss: 2.237248
13 epoch Loss: 2.192678
14 epoch Loss: 2.132828
15 epoch Loss: 2.142756
16 epoch Loss: 2.048900
17 epoch Loss: 1.992677
18 epoch Loss: 2.026711
19 epoch Loss: 1.947907
20 epoch Loss: 1.937680
21 epoch Loss: 1.883471
22 epoch Loss: 1.886932
23 epoch Loss: 1.818919
24 epoch Loss: 1.834480
25 epoch Loss: 1.761302
26 epoch Loss: 1.749528
27 epoch Loss: 1.706164
28 epoch Loss: 1.681559
29 epoch Loss: 1.677451
30 epoch Loss: 1.660680
31 epoch Loss: 1.592473
32 epoch Loss: 1.580547
33 epoch Loss: 1.634768
34 epoch Loss: 1.558004
35 epoch Loss: 1.532536
36 epoch Loss: 1.497547
37 epoch Loss: 1.485069
38 epoch Loss: 1.447327
39 epoch Loss: 1.478999
40 epoch Loss: 1.452560
41 epoch Loss: 1.486562
42

  return f(**kwargs)


In [None]:
#Neural network accuracy: 80.576978, Random forests accuracy: 70.951157, Ensemble accuracy: 82.405027

In [None]:
################################################################################################

In [None]:
#DNN OvrSVM

In [33]:
if __name__ == "__main__":
#Load data

    
    #Initialize
    label -= 1
    np.random.seed(2018)


    
    t_index = np.random.permutation(int(np.shape(data)[0]/Outer_loop)*Outer_loop)
    t_index = np.reshape(t_index, [Outer_loop, -1])
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"]="0" 
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    
    box = np.array([4000], dtype=np.int32)
    flag = 0
    for test_index in t_index:
        if flag == Outer_loop-1:
            test_index = np.array(np.concatenate((test_index, np.array(range(int(np.shape(data)[0]/Outer_loop)*Outer_loop,np.shape(data)[0]))), axis=0), dtype=np.int32)
        train_index = np.setdiff1d(np.array(range(0,np.shape(data)[0])), test_index)
        train_data = data[train_index]
        train_label = label[train_index]
        test_data = data[test_index]
        test_label = label[test_index]
        
        kf = np.random.permutation(int(np.shape(train_data)[0]/Inner_loop)*Inner_loop)
        kf = kf.reshape([Inner_loop]+[-1])
        val_result = np.zeros([np.shape(train_data)[0],48], dtype=np.float32)
        
        tot_acc = np.zeros([Inner_loop,5], dtype=np.float32)
        lsvc = LinearSVC(C=1, penalty="l1", dual=False).fit(data, label)
        coef = np.squeeze(np.sum(np.square(np.array(lsvc.coef_)), axis=0))
        coefidx = np.argsort(coef)

        
        u = np.sum(tot_acc,0)
       
        best_n = box[np.argmax(u)]
        idx = coefidx[-best_n:]
        
        tr_data = train_data[:,idx]
        te_data = test_data[:,idx]
        nn_acc, result_nn = dnn(tr_data, train_label, te_data, test_label)
        rf_acc, result_rf = ovr(tr_data, train_label, te_data, test_label)
        en_acc = 0.0
        for i in range(0,np.shape(te_data)[0]):
            r = np.argmax(result_nn[i]+result_rf[i])
            if r == test_label[i]:
                en_acc += 1
        en_acc /= np.shape(te_data)[0]*0.01
        print("Outer_fold # of features:  %d, Neural network accuracy: %f, Random forests accuracy: %f, Ensemble accuracy: %f" % (best_n, nn_acc, rf_acc, en_acc))
        flag += 1

  return f(**kwargs)


0 epoch Loss: 3.378391
1 epoch Loss: 3.207388
2 epoch Loss: 3.077537
3 epoch Loss: 2.933593
4 epoch Loss: 2.847540
5 epoch Loss: 2.722637
6 epoch Loss: 2.605994
7 epoch Loss: 2.529216
8 epoch Loss: 2.471806
9 epoch Loss: 2.412961
10 epoch Loss: 2.357366
11 epoch Loss: 2.273908
12 epoch Loss: 2.188370
13 epoch Loss: 2.219376
14 epoch Loss: 2.153436
15 epoch Loss: 2.127754
16 epoch Loss: 2.099359
17 epoch Loss: 1.978238
18 epoch Loss: 1.973884
19 epoch Loss: 1.957685
20 epoch Loss: 1.926740
21 epoch Loss: 1.887076
22 epoch Loss: 1.833307
23 epoch Loss: 1.821437
24 epoch Loss: 1.799424
25 epoch Loss: 1.779022
26 epoch Loss: 1.719849
27 epoch Loss: 1.696606
28 epoch Loss: 1.645789
29 epoch Loss: 1.702948
30 epoch Loss: 1.662552
31 epoch Loss: 1.600308
32 epoch Loss: 1.613919
33 epoch Loss: 1.565426
34 epoch Loss: 1.552764
35 epoch Loss: 1.499697
36 epoch Loss: 1.478473
37 epoch Loss: 1.506360
38 epoch Loss: 1.451754
39 epoch Loss: 1.461858
40 epoch Loss: 1.432213
41 epoch Loss: 1.404731
42

  return f(**kwargs)


KeyboardInterrupt: 

In [None]:
#Neural network accuracy: 80.205656, Ovrsvm accuracy: 76.806627, Ensemble accuracy: 81.205370

In [None]:
########################################################################

In [None]:
#DNN KNN

In [9]:
if __name__ == "__main__":
#Load data

    
    #Initialize
    label -= 1
    np.random.seed(2018)


    
    t_index = np.random.permutation(int(np.shape(data)[0]/Outer_loop)*Outer_loop)
    t_index = np.reshape(t_index, [Outer_loop, -1])
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"]="0" 
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    
    box = np.array([4000], dtype=np.int32)
    flag = 0
    for test_index in t_index:
        if flag == Outer_loop-1:
            test_index = np.array(np.concatenate((test_index, np.array(range(int(np.shape(data)[0]/Outer_loop)*Outer_loop,np.shape(data)[0]))), axis=0), dtype=np.int32)
        train_index = np.setdiff1d(np.array(range(0,np.shape(data)[0])), test_index)
        train_data = data[train_index]
        train_label = label[train_index]
        test_data = data[test_index]
        test_label = label[test_index]
        
        kf = np.random.permutation(int(np.shape(train_data)[0]/Inner_loop)*Inner_loop)
        kf = kf.reshape([Inner_loop]+[-1])
        val_result = np.zeros([np.shape(train_data)[0],48], dtype=np.float32)
        
        tot_acc = np.zeros([Inner_loop,5], dtype=np.float32)
        lsvc = LinearSVC(C=1, penalty="l1", dual=False).fit(data, label)
        coef = np.squeeze(np.sum(np.square(np.array(lsvc.coef_)), axis=0))
        coefidx = np.argsort(coef)
        
        u = np.sum(tot_acc,0)
        
       
        best_n = box[np.argmax(u)]
        idx = coefidx[-best_n:]
        
        tr_data = train_data[:,idx]
        te_data = test_data[:,idx]
        nn_acc, result_nn = dnn(tr_data, train_label, te_data, test_label)
        rf_acc, result_rf = KNN(tr_data, train_label, te_data, test_label)
        en_acc = 0.0
        for i in range(0,np.shape(te_data)[0]):
            r = np.argmax(result_nn[i]+result_rf[i])
            if r == test_label[i]:
                en_acc += 1
        en_acc /= np.shape(te_data)[0]*0.01
        print("Outer_fold # of features:  %d, Neural network accuracy: %f, KNN accuracy: %f, Ensemble accuracy: %f" % (best_n, nn_acc, rf_acc, en_acc))
        flag += 1

  return f(**kwargs)


Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

0 epoch Loss: 3.379283
1 epoch Loss: 3.198632
2 epoch Loss: 3.062926
3 epoch Loss: 2.935328
4 epoch Loss: 2.849031
5 epoch Loss: 2.742313
6 epoch Loss: 2.626125
7 epoch Loss: 2.562875
8 epoch Loss: 2.484674
9 epoch Loss: 2.441144
10 epoch Loss: 2.366846
11 epoch Loss: 2.308050
12 epoch Loss: 2.226232
13 epoch Loss: 2.238018
14 epoch Loss: 2.149177
15 epoch Loss: 2.161984
16 epoch Loss: 2.095538
17 epoch Loss: 2.037252
18 epoch Loss: 2.017752
19 epoch Loss: 1.999666
20 epoch Loss: 1.918216
21 epoch Loss: 1.883197
22 epoch Loss: 1.872578
23 epoch Loss: 1.836485
24 epoch Loss: 1.764731
25 epoch Loss: 1.785466

  return f(**kwargs)


KeyboardInterrupt: 

In [None]:
#Neural network accuracy: 79.748643, KNN accuracy: 41.902314, Ensemble accuracy: 71.293916

In [None]:
########################################################################

In [None]:
#RandonForest OvrSVM

In [44]:
if __name__ == "__main__":
#Load data

    
    #Initialize
    label -= 1
    np.random.seed(2018)


    
    t_index = np.random.permutation(int(np.shape(data)[0]/Outer_loop)*Outer_loop)
    t_index = np.reshape(t_index, [Outer_loop, -1])
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"]="0" 
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    
    box = np.array([4000], dtype=np.int32)
    flag = 0
    for test_index in t_index:
        if flag == Outer_loop-1:
            test_index = np.array(np.concatenate((test_index, np.array(range(int(np.shape(data)[0]/Outer_loop)*Outer_loop,np.shape(data)[0]))), axis=0), dtype=np.int32)
        train_index = np.setdiff1d(np.array(range(0,np.shape(data)[0])), test_index)
        train_data = data[train_index]
        train_label = label[train_index]
        test_data = data[test_index]
        test_label = label[test_index]
        
        kf = np.random.permutation(int(np.shape(train_data)[0]/Inner_loop)*Inner_loop)
        kf = kf.reshape([Inner_loop]+[-1])
        val_result = np.zeros([np.shape(train_data)[0],48], dtype=np.float32)
        
        tot_acc = np.zeros([Inner_loop,5], dtype=np.float32)
        lsvc = LinearSVC(C=1, penalty="l1", dual=False).fit(data, label)
        coef = np.squeeze(np.sum(np.square(np.array(lsvc.coef_)), axis=0))
        coefidx = np.argsort(coef)

        
        u = np.sum(tot_acc,0)
        
       
        best_n = box[np.argmax(u)]
        idx = coefidx[-best_n:]
        
        tr_data = train_data[:,idx]
        te_data = test_data[:,idx]
        nn_acc, result_nn = ovr(tr_data, train_label, te_data, test_label)
        rf_acc, result_rf = rfc(tr_data, train_label, te_data, test_label)
        en_acc = 0.0
        for i in range(0,np.shape(te_data)[0]):
            r = np.argmax(result_nn[i]+result_rf[i])
            if r == test_label[i]:
                en_acc += 1
        en_acc /= np.shape(te_data)[0]*0.01
        print("Outer_fold # of features:  %d, Ovrsvm accuracy: %f, KNN accuracy: %f, Ensemble accuracy: %f" % (best_n, nn_acc, rf_acc, en_acc))
        flag += 1

  return f(**kwargs)


Outer_fold # of features:  4000, Ovrsvm accuracy: 76.949443, KNN accuracy: 70.951157, Ensemble accuracy: 80.177092


  return f(**kwargs)


Outer_fold # of features:  4000, Ovrsvm accuracy: 75.921165, KNN accuracy: 71.093973, Ensemble accuracy: 78.834619


In [None]:
########################################################################

In [None]:
#RandonForest KNN

In [11]:
if __name__ == "__main__":
#Load data

    
    #Initialize
    label -= 1
    np.random.seed(2018)


    
    t_index = np.random.permutation(int(np.shape(data)[0]/Outer_loop)*Outer_loop)
    t_index = np.reshape(t_index, [Outer_loop, -1])
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"]="0" 
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    
    box = np.array([4000], dtype=np.int32)
    flag = 0
    for test_index in t_index:
        if flag == Outer_loop-1:
            test_index = np.array(np.concatenate((test_index, np.array(range(int(np.shape(data)[0]/Outer_loop)*Outer_loop,np.shape(data)[0]))), axis=0), dtype=np.int32)
        train_index = np.setdiff1d(np.array(range(0,np.shape(data)[0])), test_index)
        train_data = data[train_index]
        train_label = label[train_index]
        test_data = data[test_index]
        test_label = label[test_index]
        
        kf = np.random.permutation(int(np.shape(train_data)[0]/Inner_loop)*Inner_loop)
        kf = kf.reshape([Inner_loop]+[-1])
        val_result = np.zeros([np.shape(train_data)[0],48], dtype=np.float32)
        
        tot_acc = np.zeros([Inner_loop,5], dtype=np.float32)
        lsvc = LinearSVC(C=1, penalty="l1", dual=False).fit(data, label)
        coef = np.squeeze(np.sum(np.square(np.array(lsvc.coef_)), axis=0))
        coefidx = np.argsort(coef)

        u = np.sum(tot_acc,0)
        
       
        best_n = box[np.argmax(u)]
        idx = coefidx[-best_n:]
        
        tr_data = train_data[:,idx]
        te_data = test_data[:,idx]
        nn_acc, result_nn = KNN(tr_data, train_label, te_data, test_label)
        rf_acc, result_rf = rfc(tr_data, train_label, te_data, test_label)
        en_acc = 0.0
        for i in range(0,np.shape(te_data)[0]):
            r = np.argmax(result_nn[i]+result_rf[i])
            if r == test_label[i]:
                en_acc += 1
        en_acc /= np.shape(te_data)[0]*0.01
        print("Outer_fold # of features:  %d, KNN accuracy: %f, Random Forest accuracy: %f, Ensemble accuracy: %f" % (best_n, nn_acc, rf_acc, en_acc))
        flag += 1

  return f(**kwargs)


Outer_fold # of features:  4000, KNN accuracy: 41.902314, Random Forest accuracy: 70.951157, Ensemble accuracy: 58.954584


  return f(**kwargs)


KeyboardInterrupt: 

In [None]:
#KNN accuracy: 41.902314, Random Forest accuracy: 70.951157, Ensemble accuracy: 58.954584

In [None]:
########################################################################

In [None]:
#OvrSVM KNN

In [11]:
if __name__ == "__main__":
#Load data

    
    #Initialize
    label -= 1
    np.random.seed(2018)


    
    t_index = np.random.permutation(int(np.shape(data)[0]/Outer_loop)*Outer_loop)
    t_index = np.reshape(t_index, [Outer_loop, -1])
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"]="0" 
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    
    box = np.array([4000], dtype=np.int32)
    flag = 0
    for test_index in t_index:
        if flag == Outer_loop-1:
            test_index = np.array(np.concatenate((test_index, np.array(range(int(np.shape(data)[0]/Outer_loop)*Outer_loop,np.shape(data)[0]))), axis=0), dtype=np.int32)
        train_index = np.setdiff1d(np.array(range(0,np.shape(data)[0])), test_index)
        train_data = data[train_index]
        train_label = label[train_index]
        test_data = data[test_index]
        test_label = label[test_index]
        
        kf = np.random.permutation(int(np.shape(train_data)[0]/Inner_loop)*Inner_loop)
        kf = kf.reshape([Inner_loop]+[-1])
        val_result = np.zeros([np.shape(train_data)[0],48], dtype=np.float32)
        
        tot_acc = np.zeros([Inner_loop,5], dtype=np.float32)
        lsvc = LinearSVC(C=1, penalty="l1", dual=False).fit(data, label)
        coef = np.squeeze(np.sum(np.square(np.array(lsvc.coef_)), axis=0))
        coefidx = np.argsort(coef)

        
        u = np.sum(tot_acc,0)
        
       
        best_n = box[np.argmax(u)]
        idx = coefidx[-best_n:]
        
        tr_data = train_data[:,idx]
        te_data = test_data[:,idx]
        nn_acc, result_nn = ovr(tr_data, train_label, te_data, test_label)
        rf_acc, result_rf = KNN(tr_data, train_label, te_data, test_label)
        en_acc = 0.0
        for i in range(0,np.shape(te_data)[0]):
            r = np.argmax(result_nn[i]+result_rf[i])
            if r == test_label[i]:
                en_acc += 1
        en_acc /= np.shape(te_data)[0]*0.01
        print("Outer_fold # of features:  %d, OvrSVM accuracy: %f, KNN accuracy: %f, Ensemble accuracy: %f" % (best_n, nn_acc, rf_acc, en_acc))
        flag += 1

  return f(**kwargs)


Outer_fold # of features:  4000, OvrSVM accuracy: 76.949443, KNN accuracy: 41.902314, Ensemble accuracy: 64.952871


  return f(**kwargs)


Outer_fold # of features:  4000, OvrSVM accuracy: 75.921165, KNN accuracy: 42.330763, Ensemble accuracy: 63.267638


In [7]:
len(MD_sel)

120

In [9]:
data = {'Accuracy':MD_acc,'Feature Select':MD_sel, 'Model':MD_md}

In [10]:
df = pd.DataFrame(data)
df

Unnamed: 0,Accuracy,Feature Select,Model
0,80.571429,no select,ensemble
1,79.571429,no select,ensemble
2,80.571429,no select,ensemble
3,79.285714,no select,ensemble
4,79.571429,no select,ensemble
...,...,...,...
115,72.857143,lsvc,random forest
116,72.714286,lsvc,random forest
117,75.714286,lsvc,random forest
118,74.000000,lsvc,random forest
