In [2]:
import numpy as np
import warnings
import sys
import pandas as pd
import scipy
from scipy.io import arff
if not sys.warnoptions:
    warnings.simplefilter("ignore")


In [1]:
def get_data(path, noise=False):
    data = np.load(path)
    if noise == True :
        data = data + np.random.normal(0, 0.001, data.shape)
    return data

In [3]:
X_train = get_data('enron-train-features.pkl').astype(np.uint8)
y_train = get_data('enron-train-labels.pkl').astype(np.int8)
X_test = get_data('enron-test-features.pkl').astype(np.uint8)
y_test = get_data('enron-test-labels.pkl').astype(np.int8)

In [4]:
print("Train_X: ",X_train.shape)
print("Train_Y: ",y_train.shape)
print("Test_X: ",X_test.shape)
print("Test_Y: ",y_test.shape)

Train_X:  (1123, 1001)
Train_Y:  (1123, 53)
Test_X:  (579, 1001)
Test_Y:  (579, 53)


In [7]:
def hamming_accuracy_score(y_true, y_pred, normalize=True, sample_weight=None):
    '''
    Compute the Hamming score (a.k.a. label-based accuracy) for the multi-label case
    http://stackoverflow.com/q/32239577/395857
    '''
    acc_list = []
    for i in range(y_true.shape[0]):
        set_true = set( np.where(y_true[i])[0] )
        set_pred = set( np.where(y_pred[i])[0] )
        #print('\nset_true: {0}'.format(set_true))
        #print('set_pred: {0}'.format(set_pred))
        tmp_a = None
        if len(set_true) == 0 and len(set_pred) == 0:
            tmp_a = 1
        else:
            tmp_a = len(set_true.intersection(set_pred))/float( len(set_true.union(set_pred)) )
        #print('tmp_a: {0}'.format(tmp_a))
        acc_list.append(tmp_a)
    return np.mean(acc_list)

## Bidirectional Model


In [8]:
import keras
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.backend.tensorflow_backend import clear_session
from sklearn.model_selection import train_test_split

In [9]:
ENS_COUNT = 2
clear_session()

In [10]:
def create_c1node(X_feed,y_now):
    '''
    C1 node Architecture:
    attribute:512:256:1 [saperate for each class]
    loss: Binary crossentropy
    '''
    model = Sequential()
    model.add(Dense(512,activation='relu',input_shape=(X_feed.shape[1],),kernel_initializer='glorot_uniform'))
    model.add(Dropout(0.4))
    model.add(Dense(256,activation='relu',kernel_initializer='glorot_uniform'))
    model.add(Dropout(0.4))
    model.add(Dense(y_now.shape[1],activation='sigmoid',kernel_initializer='glorot_uniform'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
    
    return model

In [11]:
ensembles_fwd = []
for j in range(ENS_COUNT//2):
    print("TRAINING ENSEMBLE {} :\n\n\n".format(j))
    #Model chain
    chain_forward = []

    #Training
    x_t,_,y_t,_ = train_test_split(X_train,y_train,test_size=0.4)
    X_feed = x_t.copy()
    for i in range(y_t.shape[1]):
        print("Training chain node ",i)
        y_now = y_t[:,[i,]].copy()
        print("Shapes:\n X = {} \n Y = {}".format(X_feed.shape,y_now.shape))

        node = create_c1node(X_feed,y_now)
        node.fit(X_feed,y_now,epochs=5,batch_size=50)
        print("Training of node {} complete\n\n".format(i))  
        #Checking accuracy of the node
        #---
        #Adding node to chain 
        chain_forward.append(node)
        #Updating X_feed
        X_feed = np.append(X_feed,y_now,axis=1)
    ensembles_fwd.append(chain_forward)

TRAINING ENSEMBLE 0 :



Training chain node  0
Shapes:
 X = (673, 1001) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 0 complete


Training chain node  1
Shapes:
 X = (673, 1002) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 1 complete


Training chain node  2
Shapes:
 X = (673, 1003) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 2 complete


Training chain node  3
Shapes:
 X = (673, 1004) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 3 complete


Training chain node  4
Shapes:
 X = (673, 1005) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 4 complete


Training chain node  5
Shapes:
 X = (673, 1006) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 5 complete


Training chain node  6
Shapes:
 X = (673, 1007) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Trai

Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 14 complete


Training chain node  15
Shapes:
 X = (673, 1016) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 15 complete


Training chain node  16
Shapes:
 X = (673, 1017) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 16 complete


Training chain node  17
Shapes:
 X = (673, 1018) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 17 complete


Training chain node  18
Shapes:
 X = (673, 1019) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 18 complete


Training chain node  19
Shapes:
 X = (673, 1020) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 19 complete


Training chain node  20
Shapes:
 X = (673, 1021) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 20 complete


Training chain node  21
Shapes:
 X = (673, 1022) 
 Y = (673, 1)
Epoch

Epoch 4/5
Epoch 5/5
Training of node 28 complete


Training chain node  29
Shapes:
 X = (673, 1030) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 29 complete


Training chain node  30
Shapes:
 X = (673, 1031) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 30 complete


Training chain node  31
Shapes:
 X = (673, 1032) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 31 complete


Training chain node  32
Shapes:
 X = (673, 1033) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 32 complete


Training chain node  33
Shapes:
 X = (673, 1034) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 33 complete


Training chain node  34
Shapes:
 X = (673, 1035) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 34 complete


Training chain node  35
Shapes:
 X = (673, 1036) 
 Y = (673, 1)
Epoch 1/5
Epoch

Epoch 5/5
Training of node 42 complete


Training chain node  43
Shapes:
 X = (673, 1044) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 43 complete


Training chain node  44
Shapes:
 X = (673, 1045) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 44 complete


Training chain node  45
Shapes:
 X = (673, 1046) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 45 complete


Training chain node  46
Shapes:
 X = (673, 1047) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 46 complete


Training chain node  47
Shapes:
 X = (673, 1048) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 47 complete


Training chain node  48
Shapes:
 X = (673, 1049) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 48 complete


Training chain node  49
Shapes:
 X = (673, 1050) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch

In [12]:
ensembles_rev = []
for j in range(ENS_COUNT//2):
    print("TRAINING ENSEMBLE {} :\n\n\n".format(j))
    #Model chain
    chain_reverse = []
    #Training
    x_t,_,y_t,_ = train_test_split(X_train,y_train,test_size=0.4)
    X_feed = x_t.copy()
    for i in range(1,y_t.shape[1]+1):
        print("Training chain node ",i)
        y_now = y_t[:,[-i,]].copy()
        print("Shapes:\n X = {} \n Y = {}".format(X_feed.shape,y_now.shape))

        node = create_c1node(X_feed,y_now)
        node.fit(X_feed,y_now,epochs=5,batch_size=50)
        print("Training of node {} complete\n\n".format(i))  
        #Checking accuracy of the node
        #---
        #Adding node to chain 
        chain_reverse.append(node)
        #Updating X_feed
        X_feed = np.append(X_feed,y_now,axis=1)
    ensembles_rev.append(chain_reverse)

TRAINING ENSEMBLE 0 :



Training chain node  1
Shapes:
 X = (673, 1001) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 1 complete


Training chain node  2
Shapes:
 X = (673, 1002) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 2 complete


Training chain node  3
Shapes:
 X = (673, 1003) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 3 complete


Training chain node  4
Shapes:
 X = (673, 1004) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 4 complete


Training chain node  5
Shapes:
 X = (673, 1005) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 5 complete


Training chain node  6
Shapes:
 X = (673, 1006) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 6 complete


Training chain node  7
Shapes:
 X = (673, 1007) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Trai

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 15 complete


Training chain node  16
Shapes:
 X = (673, 1016) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 16 complete


Training chain node  17
Shapes:
 X = (673, 1017) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 17 complete


Training chain node  18
Shapes:
 X = (673, 1018) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 18 complete


Training chain node  19
Shapes:
 X = (673, 1019) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 19 complete


Training chain node  20
Shapes:
 X = (673, 1020) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 20 complete


Training chain node  21
Shapes:
 X = (673, 1021) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 21 complete


Training chain node  22
Shapes:
 X = (673, 1022) 
 Y = (673

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 29 complete


Training chain node  30
Shapes:
 X = (673, 1030) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 30 complete


Training chain node  31
Shapes:
 X = (673, 1031) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 31 complete


Training chain node  32
Shapes:
 X = (673, 1032) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 32 complete


Training chain node  33
Shapes:
 X = (673, 1033) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 33 complete


Training chain node  34
Shapes:
 X = (673, 1034) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 34 complete


Training chain node  35
Shapes:
 X = (673, 1035) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 35 complete


Training chain node  36
Shapes:
 X = (673, 1036) 
 Y = (673

Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 43 complete


Training chain node  44
Shapes:
 X = (673, 1044) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 44 complete


Training chain node  45
Shapes:
 X = (673, 1045) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 45 complete


Training chain node  46
Shapes:
 X = (673, 1046) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 46 complete


Training chain node  47
Shapes:
 X = (673, 1047) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 47 complete


Training chain node  48
Shapes:
 X = (673, 1048) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 48 complete


Training chain node  49
Shapes:
 X = (673, 1049) 
 Y = (673, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 49 complete


Training chain node  50
Shapes:
 X = (673, 1050) 
 Y = (673, 1)
Epoch

In [13]:
y_full_fw = []
y_full_re = []

for k in range(ENS_COUNT//2):
    print("\n\nENSEMBLE {}:\n\n".format(k+1))
    X_feed_fw = X_test.copy()
    X_feed_re = X_test.copy()
    j = 0
    #Model chain
    y_pred_fw = []
    y_pred_re = []
    for i in range(len(chain_forward)):
        node1 = ensembles_fwd[k][i]
        node2 = ensembles_rev[k][i]
        print("Getting op from chain node ",j)
        node_name = "Node" + str(i)
        print("Shapes:\n X = {}".format(X_feed_fw.shape))
        output_fw = node1.predict(X_feed_fw)
        output_fw_bin = output_fw.round().astype(int)
        output_rev = node2.predict(X_feed_re)
        output_rev_bin = output_rev.round().astype(int)
        y_pred_fw.append(output_fw)
        y_pred_re.append(output_rev)
        X_feed_fw = np.append(X_feed_fw,output_fw_bin,axis=1)
        X_feed_re = np.append(X_feed_re,output_rev_bin,axis=1)
        j+=1
    y_full_fw.append(y_pred_fw)
    y_full_re.append(y_pred_re)
        #Updating X_feed
            



ENSEMBLE 1:


Getting op from chain node  0
Shapes:
 X = (579, 1001)
Getting op from chain node  1
Shapes:
 X = (579, 1002)
Getting op from chain node  2
Shapes:
 X = (579, 1003)
Getting op from chain node  3
Shapes:
 X = (579, 1004)
Getting op from chain node  4
Shapes:
 X = (579, 1005)
Getting op from chain node  5
Shapes:
 X = (579, 1006)
Getting op from chain node  6
Shapes:
 X = (579, 1007)
Getting op from chain node  7
Shapes:
 X = (579, 1008)
Getting op from chain node  8
Shapes:
 X = (579, 1009)
Getting op from chain node  9
Shapes:
 X = (579, 1010)
Getting op from chain node  10
Shapes:
 X = (579, 1011)
Getting op from chain node  11
Shapes:
 X = (579, 1012)
Getting op from chain node  12
Shapes:
 X = (579, 1013)
Getting op from chain node  13
Shapes:
 X = (579, 1014)
Getting op from chain node  14
Shapes:
 X = (579, 1015)
Getting op from chain node  15
Shapes:
 X = (579, 1016)
Getting op from chain node  16
Shapes:
 X = (579, 1017)
Getting op from chain node  17
Shapes:
 X 

In [14]:
cross_chain_group = []
for j in range(ENS_COUNT//2):
    prediction = []
    y_pred_fw = y_full_fw[j].copy()
    y_pred_re = y_full_re[j].copy()
    fw = np.array(y_pred_fw)[:,:,0].transpose().copy()
    re = np.array(y_pred_re)[:,:,0].transpose().copy()
    for i in range(np.array(y_pred_fw)[:,:,0].transpose().shape[0]):
        fw_i = fw[i,:].reshape(1,-1)[0]
        re_i = re[i,:].reshape(1,-1)[0][::-1] 
        final = (fw_i+re_i)/2
        prediction.append(final)
    cross_chain_group.append(np.array(prediction))

In [15]:
#Adding all 5 results
total = None
for x in cross_chain_group:
    if total is None:
        total = x
    else:
        total = total + x
#Calculating the average
predictions = (total/(ENS_COUNT//2)).round().astype(int)

In [16]:
from sklearn.metrics import accuracy_score,hamming_loss, log_loss, f1_score

In [17]:
# accuracy
print("Accuracy = ",accuracy_score(y_test,predictions))
print("Hamming accuracy = ",hamming_accuracy_score(y_test,predictions))
print("Hamming loss = ",hamming_loss(y_test,predictions))
print("Log loss = ",log_loss(y_test,predictions))
print("F1 score = ",f1_score(y_test,predictions,average='macro'))

Accuracy =  0.155440414508
Hamming accuracy =  0.450178065074
Hamming loss =  0.04506794408055528
Log loss =  59.7326737875
F1 score =  0.12306754899
