In [23]:
import tensorflow.compat.v1 as tf
import scipy.io
import numpy as np
import os
import random
import math
#from skimage.measure import structural_similarity as ssim
#from sporco import util
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import linear_model
from sklearn.feature_selection import RFECV, RFE
import multiprocessing
import datetime
import hdf5storage

In [24]:
def fnn(x, input_size, output_size, keep_prob, stddev=0.01, constant=0.0001, dropout=True, end=False):
    fc_w = tf.Variable(tf.truncated_normal([input_size,output_size], stddev=stddev,seed=np.random.seed(2018)))
    fc_b = tf.Variable(tf.constant(constant,shape=[output_size]), dtype=tf.float32)
    fc_h = tf.nn.relu(tf.matmul(x,fc_w)+fc_b) if not end else tf.matmul(x,fc_w)+fc_b
    return tf.nn.dropout(fc_h, keep_prob,seed=np.random.seed(2018)) if dropout else fc_h

In [25]:
def fcn(x, input_size, output_size, nlayers, nparameters, keep_prob):
    if nlayers == 1:
        h1 = fnn(x, input_size, output_size, keep_prob, end=True)
    elif nlayers == 2:
        h1 = fnn(fnn(x, input_size, nparameters, keep_prob, end=False), nparameters, output_size, keep_prob, end=True)
    elif nlayers >= 3:
        h0 = fnn(x, input_size, nparameters, keep_prob, end=False)
        for j in range(0,nlayers-2):
            if j == 0:
                h1 = fnn(h0, nparameters, nparameters, keep_prob, end=False)
            else:
                h1 = fnn(h1, nparameters, nparameters, keep_prob, end=False)
        h1 = fnn(h1, nparameters, output_size, keep_prob, end=True)
    else:
        print("# of layers can't be smaller than 0")
    return h1

In [26]:
def rfc(train_data, train_label, test_data, test_label):
    rf = RandomForestClassifier(n_estimators=150,
                                    criterion='gini',
                                    max_depth=None,
                                    min_samples_split=2,
                                    min_samples_leaf=1,
                                    min_weight_fraction_leaf=0.0,
                                    max_features=None,
                                    max_leaf_nodes=None,
                                    bootstrap=True,
                                    oob_score=False,
                                    n_jobs=10,
                                    random_state=123,
                                    verbose=0,
                                    warm_start=False,
                                    class_weight=None)
    rf.fit(train_data, train_label.ravel())
    result = rf.predict_proba(test_data)
    acc = 0.0
    for i in range(np.shape(test_data)[0]):
        r = np.argmax(result[i])
        if r == test_label[i]:
            acc += 1
    acc /= np.shape(test_data)[0]
    acc *= 100
    return acc, result

In [27]:
def dnn(train_data, train_label, test_data, test_label):
    g = tf.Graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    batch_size = 10
    input_size = np.shape(train_data)[1]
    output_size = 31

    with g.as_default():
        p_x = tf.placeholder(tf.float32, [batch_size, 1, input_size, 1])
        p_y = tf.placeholder(tf.float32, [batch_size, output_size])
        keep_prob = tf.placeholder(tf.float32)
        h10_flat = tf.reshape(p_x, [batch_size,-1])
        h1 = fnn(h10_flat, input_size, 2048, keep_prob, end=False)
        h2 = fnn(h1, 2048, 2048, keep_prob, end=False)
        h3 = fnn(h2, 2048, 31, keep_prob, end=True)
        h4 = tf.reshape(h3, [batch_size, 31])
        h_c = tf.nn.softmax(h4)
        loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(labels=p_y, logits=h4))
        optim = tf.train.AdamOptimizer(1e-5)
        trainer = optim.minimize(loss)
    
    accuracy = 0.0
    result = np.zeros([np.shape(test_data)[0], 31])
    with tf.Session(graph=g, config=config) as sess:
        sess.run(tf.global_variables_initializer())
        for e in range(0,120):
            loss_tot = 0.0
            for i in range(0,int(np.ceil(np.shape(train_data)[0]/batch_size))):
                a = np.random.randint(0,np.shape(train_data)[0],size=batch_size)
                x = train_data[a].reshape([batch_size, 1, input_size, 1])#[4,1,18181,1]
                y = np.zeros([batch_size, output_size])
                index = train_label[a]
                for u in range(0,batch_size):
                    y[u,index[u]] = 1
                _ , loss_val = sess.run([trainer, loss], feed_dict={p_x:x, p_y:y, keep_prob:0.6})
                loss_tot += loss_val
            print("%d epoch Loss: %f" % (e,(loss_tot)/np.shape(train_data)[0]))
        temp = 0
        for i in range(0,int(np.floor(np.shape(test_data)[0]/batch_size))):
            x = test_data[i*batch_size:(i+1)*batch_size].reshape([batch_size, 1, input_size, 1])
            out = sess.run(h_c, feed_dict={p_x:x, keep_prob:1})
            for j in range(0, batch_size):
                t = np.squeeze(out[j])
                result[temp] = t
                temp+=1
        remain = int(np.shape(test_data)[0]-np.floor(np.shape(test_data)[0]/batch_size)*batch_size)
        if remain > 0:
            x = test_data[-batch_size-1:-1].reshape([batch_size, 1, input_size, 1])
            out = sess.run(h_c, feed_dict={p_x:x, keep_prob:1})
            for j in range(0,int(remain)):
                t = np.squeeze(out[j+(batch_size-remain)])
                result[temp] = t
                temp+=1
        for i in range(0,np.shape(test_data)[0]):
            ind = np.argmax(np.squeeze(result[i]))
            if ind == test_label[i]:
                accuracy += 1
        accuracy /= np.shape(test_data)[0]*0.01
        sess.close()
    return accuracy, result

In [28]:
dataID = hdf5storage.loadmat('data.mat')
data = np.array(dataID['data'], dtype=np.float32)
gt1 = scipy.io.loadmat('label.mat')
label = np.array(gt1['label'], dtype=np.int32)

In [29]:
Outer_loop = 10
Inner_loop = 10

In [30]:
lsvc = LinearSVC(C=1, penalty="l1", dual=False).fit(data, label)
coef = np.squeeze(np.sum(np.square(np.array(lsvc.coef_)), axis=0))
print(lsvc.coef_)

  return f(**kwargs)


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]




In [50]:
model = SelectFromModel(lsvc, prefit=True) 

In [51]:
model

SelectFromModel(estimator=LinearSVC(C=1, dual=False, penalty='l1'), prefit=True)

In [56]:
X_new = model.transform(data) 
print(model.get_support()) 

[False False False ... False  True False]


In [60]:
lsvc.coef_

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [69]:


# linear
# linear regression feature importance
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot
# define dataset
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, random_state=1)
# define the model
model = LinearRegression()
# fit the model
model.fit(X, y)
# get importance
importance = model.coef_
# summarize feature importance
for i,v in enumerate(importance):
	print('Feature: %0d, Score: %.5f' % (i,v))

Feature: 0, Score: -0.00000
Feature: 1, Score: 12.44483
Feature: 2, Score: -0.00000
Feature: 3, Score: -0.00000
Feature: 4, Score: 93.32225
Feature: 5, Score: 86.50811
Feature: 6, Score: 26.74607
Feature: 7, Score: 3.28535
Feature: 8, Score: -0.00000
Feature: 9, Score: 0.00000


In [63]:
X

array([[-2.02220122,  0.31563495,  0.82797464, ..., -0.50446586,
         0.23009474,  0.76201118],
       [ 0.71256194,  1.74103872, -1.21466535, ...,  1.04131149,
        -0.75850596,  0.74791592],
       [ 0.08338884,  0.92829021,  1.45167891, ...,  1.22393601,
         0.3416886 , -0.12517266],
       ...,
       [ 0.12182436,  0.74220833, -0.64488697, ..., -2.37934499,
         1.82039313, -1.55531804],
       [ 0.00820639, -0.89191578,  0.14747174, ..., -1.78611048,
         0.71238157, -1.07498942],
       [-0.43805451,  0.29078795,  0.17794556, ..., -0.18581086,
        -0.26120192,  0.8632634 ]])

In [64]:
data

array([[0.        , 0.        , 0.        , ..., 0.        , 0.01639344,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.01204819],
       ...,
       [0.        , 0.        , 0.        , ..., 0.0041841 , 0.0125523 ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.09090909, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.00714286]], dtype=float32)

In [59]:
model.coef_

array([-2.78228246e-14,  1.24448279e+01, -2.30926389e-14, -2.33146835e-14,
        9.33222545e+01,  8.65081100e+01,  2.67460667e+01,  3.28534640e+00,
       -2.48689958e-14,  3.10862447e-14])

In [62]:
# get importance
importance0 = lsvc.coef_
# summarize feature importance
for i,v in enumerate(importance0):
	print('Feature: %0d, Score: %.5f' % (i,v))

TypeError: only size-1 arrays can be converted to Python scalars

In [46]:
#r 31 c 35565

In [37]:
len(lsvc.coef_)

31

In [38]:
coefidx = np.argsort(coef)

In [72]:
coefidx1 = np.sort(coef)

In [81]:
coef

array([0.        , 0.        , 0.        , ..., 0.        , 0.07610835,
       0.        ])

In [80]:
coefidx1[35564]

103.13176260155834

In [40]:
coefidx[0:10]

array([    0, 22105, 22104, 22103, 22102, 22101, 22100, 22098, 22097,
       22096])

In [None]:
[    0, 22107, 22106, 22105, 22104, 22103, 22101, 22098, 22097,
       22096]

In [None]:
0, 22105, 22104, 22103, 22102, 22101, 22100, 22098, 22097,
       22096]

In [None]:
data = {'Accuracy':MD_acc,'Feature Number':MD_num, 'Model':MD_md}

In [None]:
df = pd.DataFrame(data)
df

In [None]:
sns.set(style="whitegrid",rc={'figure.figsize':(11.7,8.27)})
sns.set(style="whitegrid")
ax = sns.boxplot(x = "Feature Number", y = "Accuracy", hue="Model", data = df, palette = "Set3")

In [10]:
if __name__ == "__main__":
#Load data

    
    #Initialize
    label -= 1
    np.random.seed(2018)


    
    t_index = np.random.permutation(int(np.shape(data)[0]/Outer_loop)*Outer_loop)
    t_index = np.reshape(t_index, [Outer_loop, -1])
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"]="0" 
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    
    box = np.array([4000], dtype=np.int32)
    flag = 0
    for test_index in t_index:
        if flag == Outer_loop-1:
            test_index = np.array(np.concatenate((test_index, np.array(range(int(np.shape(data)[0]/Outer_loop)*Outer_loop,np.shape(data)[0]))), axis=0), dtype=np.int32)
        train_index = np.setdiff1d(np.array(range(0,np.shape(data)[0])), test_index)
        train_data = data[train_index]
        train_label = label[train_index]
        test_data = data[test_index]
        test_label = label[test_index]
        
        kf = np.random.permutation(int(np.shape(train_data)[0]/Inner_loop)*Inner_loop)
        kf = kf.reshape([Inner_loop]+[-1])
        val_result = np.zeros([np.shape(train_data)[0],48], dtype=np.float32)
        
        tot_acc = np.zeros([Inner_loop,5], dtype=np.float32)
        #lasso = Lasso()
        lsvc = LinearSVC(C=1, penalty="l1", dual=False).fit(data, label)
        coef = np.squeeze(np.sum(np.square(np.array(lsvc.coef_)), axis=0))
        print(lsvc.coef_)
        #coef = np.squeeze(np.sum(np.square(np.array(lasso.coef_)), axis=0))
        coefidx = np.argsort(coef)
#         for inner_fold in range(0,Inner_loop):
#             val_test_ind = kf[inner_fold]
#             if inner_fold == Inner_loop-1:
#                 val_test_ind = np.array(np.concatenate((val_test_ind,np.array(range(int(np.shape(train_data)[0]/Outer_loop)*Outer_loop,np.shape(train_data)[0]),dtype=np.int32)), axis=0),dtype=np.int32)
            
#             val_train_ind = np.setdiff1d(np.array(range(0,np.shape(train_data)[0]),dtype=np.int32), val_test_ind)
#             val_train = train_data[val_train_ind]
#             val_test = train_data[val_test_ind]
#             val_train_label = train_label[val_train_ind]
#             val_test_label = train_label[val_test_ind]
#             temp = 0
#             for item in box:
#                 idx = coefidx[-item:]
#                 vtrain = val_train[:,idx]
#                 vtest = val_test[:,idx]
#                 nn_acc, result_nn = dnn(vtrain, val_train_label, vtest, val_test_label)
#                 rf_acc, result_rf = rfc(vtrain, val_train_label, vtest, val_test_label)
#                 en_acc = 0.0
#                 for i in range(0,np.shape(vtest)[0]):
#                     r = np.argmax(result_nn[i]+result_rf[i])
#                     if r == val_test_label[i]:
#                         en_acc += 1
#                 en_acc /= np.shape(vtest)[0]*0.01
#                 tot_acc[inner_fold,temp] = en_acc
#                 print("Inner_fold # of features: %d, Neural network accuracy: %f, Random forests accuracy: %f, Ensemble accuracy: %f" % (item, nn_acc, rf_acc, en_acc))
#                 temp += 1
        
#         u = np.sum(tot_acc,0)
       
#         best_n = box[np.argmax(u)]
#         idx = coefidx[-best_n:]
        
#         tr_data = train_data[:,idx]
#         te_data = test_data[:,idx]
#         nn_acc, result_nn = dnn(tr_data, train_label, te_data, test_label)
#         rf_acc, result_rf = rfc(tr_data, train_label, te_data, test_label)
#         en_acc = 0.0
#         for i in range(0,np.shape(te_data)[0]):
#             r = np.argmax(result_nn[i]+result_rf[i])
#             if r == test_label[i]:
#                 en_acc += 1
#         en_acc /= np.shape(te_data)[0]*0.01
#         print("Outer_fold # of features:  %d, Neural network accuracy: %f, Random forests accuracy: %f, Ensemble accuracy: %f" % (best_n, nn_acc, rf_acc, en_acc))
        flag += 1

  return f(**kwargs)


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


  return f(**kwargs)


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


  return f(**kwargs)


KeyboardInterrupt: 