In [2]:
import numpy as np
import warnings
import sys
import pandas as pd
import scipy
from scipy.io import arff
if not sys.warnoptions:                                         
    warnings.simplefilter("ignore")
import pickle

In [3]:
def get_data(path, noise=False):
    data = np.load(path)
    if noise == True :
        data = data + np.random.normal(0, 0.001, data.shape)
    return data

In [4]:
X_train = get_data('enron-train-features.pkl').astype(np.uint8)
y_train = get_data('enron-train-labels.pkl').astype(np.int8)
X_test = get_data('enron-test-features.pkl').astype(np.uint8)
y_test = get_data('enron-test-labels.pkl').astype(np.int8)

In [5]:
print("Train_X: ",X_train.shape)
print("Train_Y: ",y_train.shape)
print("Test_X: ",X_test.shape)
print("Test_Y: ",y_test.shape)

Train_X:  (1123, 1001)
Train_Y:  (1123, 53)
Test_X:  (579, 1001)
Test_Y:  (579, 53)


In [6]:
def hamming_accuracy_score(y_true, y_pred, normalize=True, sample_weight=None):
    '''
    Compute the Hamming score (a.k.a. label-based accuracy) for the multi-label case
    http://stackoverflow.com/q/32239577/395857
    '''
    acc_list = []
    for i in range(y_true.shape[0]):
        set_true = set( np.where(y_true[i])[0] )
        set_pred = set( np.where(y_pred[i])[0] )
        #print('\nset_true: {0}'.format(set_true))
        #print('set_pred: {0}'.format(set_pred))
        tmp_a = None
        if len(set_true) == 0 and len(set_pred) == 0:
            tmp_a = 1
        else:
            tmp_a = len(set_true.intersection(set_pred))/float( len(set_true.union(set_pred)) )
        #print('tmp_a: {0}'.format(tmp_a))
        acc_list.append(tmp_a)
    return np.mean(acc_list)

## Chain Model type 1


In [7]:
import keras
from keras.models import Sequential, model_from_json
from keras.layers import Dense,Dropout
from keras.backend.tensorflow_backend import clear_session

Using TensorFlow backend.


In [8]:
def create_c1node(X_feed,y_now):
    '''
    C1 node Architecture:
    attribute:512:256:1 [saperate for each class]
    loss: Binary crossentropy
    '''
    model = Sequential()
    model.add(Dense(512,activation='relu',input_shape=(X_feed.shape[1],),kernel_initializer='glorot_uniform',name='first'))
    model.add(Dropout(0.4))
    model.add(Dense(256,activation='relu',kernel_initializer='glorot_uniform',name='nretrain1'))
    model.add(Dropout(0.4))
    model.add(Dense(128,activation='relu',kernel_initializer='glorot_uniform',name='nretrain2'))
    model.add(Dropout(0.4))
    model.add(Dense(64,activation='relu',kernel_initializer='glorot_uniform',name='last'))
    model.add(Dropout(0.4))    
    model.add(Dense(y_now.shape[1],activation='sigmoid',kernel_initializer='glorot_uniform',name='output'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
    return model

def create_copynode(X_feed,y_now,w_name):
    '''
    C1 node Architecture:
    attribute:512:256:1 [saperate for each class]
    loss: Binary crossentropy
    '''
    model = Sequential()
    model.add(Dense(512,activation='relu',input_shape=(X_feed.shape[1],),kernel_initializer='glorot_uniform',name='copyfirst'))
    model.add(Dropout(0.4))
    model.add(Dense(256,activation='relu',kernel_initializer='glorot_uniform',trainable=False,name='nretrain1'))
    model.add(Dropout(0.4))
    model.add(Dense(128,activation='relu',kernel_initializer='glorot_uniform',trainable=False,name='nretrain2'))
    model.add(Dropout(0.4))
    model.add(Dense(64,activation='relu',kernel_initializer='glorot_uniform',name='copylast'))
    model.add(Dropout(0.4)) 
    model.add(Dense(y_now.shape[1],activation='sigmoid',kernel_initializer='glorot_uniform',name='copyoutput'))
    model.load_weights(w_name,by_name=True)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
    return model

In [9]:
from time import time

In [10]:
#Model chain
chain = []
first = 1
#Training
X_feed = X_train.copy()
t1 = time()
for i in range(y_train.shape[1]):
    print("Training chain node ",i)
    y_now = y_train[:,[i,]].copy()
    print("Shapes:\n X = {} \n Y = {}".format(X_feed.shape,y_now.shape))
    if first == 1:
        node = create_c1node(X_feed,y_now)
        node.fit(X_feed,y_now,epochs=20,batch_size=50)
        '''
        json_fl = node.to_json()
        with open('node1.json','w') as f:
            f.write(json_fl)
        '''
        node.save_weights('weights.h5')
        first = 0
    else:
        node = create_copynode(X_feed,y_now,'weights.h5')
        node.fit(X_feed,y_now,epochs=5,batch_size=50)
        
    print("Training of node {} complete\n\n".format(i))  
    #Checking accuracy of the node
    #---
    #Adding node to chain 
    chain.append(node)
    #Updating X_feed
    X_feed = np.append(X_feed,y_now,axis=1)
t2 = time()
print("Time taken: ",(t2-t1))

Training chain node  0
Shapes:
 X = (1123, 1001) 
 Y = (1123, 1)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Training of node 0 complete


Training chain node  1
Shapes:
 X = (1123, 1002) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 1 complete


Training chain node  2
Shapes:
 X = (1123, 1003) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 2 complete


Training chain node  3
Shapes:
 X = (1123, 1004) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 3 complete


Training chain node  4
Shapes:
 X = (1123, 1005) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 4 complete


Training chain node  5
Shapes:
 X = (1123, 1006) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5

Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 11 complete


Training chain node  12
Shapes:
 X = (1123, 1013) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 12 complete


Training chain node  13
Shapes:
 X = (1123, 1014) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 13 complete


Training chain node  14
Shapes:
 X = (1123, 1015) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 14 complete


Training chain node  15
Shapes:
 X = (1123, 1016) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 15 complete


Training chain node  16
Shapes:
 X = (1123, 1017) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 16 complete


Training chain node  17
Shapes:
 X = (1123, 1018) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 17 complete


Training chain node  18
Shapes:
 X = (1123, 1019) 
 Y = (

Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 25 complete


Training chain node  26
Shapes:
 X = (1123, 1027) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 26 complete


Training chain node  27
Shapes:
 X = (1123, 1028) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 27 complete


Training chain node  28
Shapes:
 X = (1123, 1029) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 28 complete


Training chain node  29
Shapes:
 X = (1123, 1030) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 29 complete


Training chain node  30
Shapes:
 X = (1123, 1031) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 30 complete


Training chain node  31
Shapes:
 X = (1123, 1032) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 31 complete


Training chain node  32
Shapes:
 X = (1123, 1033) 
 Y = (

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 39 complete


Training chain node  40
Shapes:
 X = (1123, 1041) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 40 complete


Training chain node  41
Shapes:
 X = (1123, 1042) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 41 complete


Training chain node  42
Shapes:
 X = (1123, 1043) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 42 complete


Training chain node  43
Shapes:
 X = (1123, 1044) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 43 complete


Training chain node  44
Shapes:
 X = (1123, 1045) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 44 complete


Training chain node  45
Shapes:
 X = (1123, 1046) 
 Y = (1123, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training of node 45 complete


Training chain node  46
Shapes:
 X = (1123, 104

In [10]:
#Model chain
y_pred = []

X_feed = X_test.copy()
i = 0
for node in chain:
    print("Getting op from chain node ",i)
    node_name = "Node" + str(i)
    print("Shapes:\n X = {}".format(X_feed.shape))
    output = node.predict(X_feed)
    output = output.round().astype(int)
    y_pred.append(output)
    X_feed = np.append(X_feed,output,axis=1)
    i+=1
    #Updating X_feed
            

Getting op from chain node  0
Shapes:
 X = (579, 1001)
Getting op from chain node  1
Shapes:
 X = (579, 1002)
Getting op from chain node  2
Shapes:
 X = (579, 1003)
Getting op from chain node  3
Shapes:
 X = (579, 1004)
Getting op from chain node  4
Shapes:
 X = (579, 1005)
Getting op from chain node  5
Shapes:
 X = (579, 1006)
Getting op from chain node  6
Shapes:
 X = (579, 1007)
Getting op from chain node  7
Shapes:
 X = (579, 1008)
Getting op from chain node  8
Shapes:
 X = (579, 1009)
Getting op from chain node  9
Shapes:
 X = (579, 1010)
Getting op from chain node  10
Shapes:
 X = (579, 1011)
Getting op from chain node  11
Shapes:
 X = (579, 1012)
Getting op from chain node  12
Shapes:
 X = (579, 1013)
Getting op from chain node  13
Shapes:
 X = (579, 1014)
Getting op from chain node  14
Shapes:
 X = (579, 1015)
Getting op from chain node  15
Shapes:
 X = (579, 1016)
Getting op from chain node  16
Shapes:
 X = (579, 1017)
Getting op from chain node  17
Shapes:
 X = (579, 1018)
Ge

In [11]:
predictions = np.array([0])
for i in y_pred:
    x = np.array(i)
    if predictions.shape == (1,):
        predictions = x
    else:
        predictions = np.append(predictions,x,axis=1)

In [12]:
from sklearn.metrics import accuracy_score,hamming_loss, log_loss, f1_score

In [13]:
# accuracy
print("Accuracy = ",accuracy_score(y_test,predictions))
print("Hamming accuracy = ",hamming_accuracy_score(y_test,predictions))
print("Hamming loss = ",hamming_loss(y_test,predictions))
print("Log loss = ",log_loss(y_test,predictions))
print("F1 score = ",f1_score(y_test,predictions,average='macro'))

Accuracy =  0.13298791019
Hamming accuracy =  0.448076367118
Hamming loss =  0.04888063349301007
Log loss =  56.0167525591
F1 score =  0.199686680276
