In [9]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import hamming_loss

# LOAD PCA DATA AND Y_DATA

In [2]:
# Import data
TRAIN_PCA_X = np.load('./data/TRAIN_PCA.npy')
TRAIN_FULL_X = np.load('./data/TRAIN_FULL.npy')
TEST_PCA_X = np.load('./data/TEST_PCA.npy')
TEST_FULL_X = np.load('./data/TEST_FULL.npy')

train_Y = np.load('./data/TRAIN_Y.npy')
test_y = np.load('./data/TEST_Y.npy')

In [3]:
input_dimensions=TRAIN_PCA_X.shape[1]
output_dimensions=train_Y.shape[1]

In [4]:
class_weight=[]
for i, col in enumerate(train_Y.T):
    neg=len(col[col==0])
    pos=len(col[col==1])
    total=pos+neg
    weight_for_0 = (1 / neg)*(total)/2.0 
    weight_for_1 = (1 / pos)*(total)/2.0
    class_weight.append({0: weight_for_0, 1: weight_for_1})
#class_weight

[{0: 0.5288483974660356, 1: 9.16599263596306},
 {0: 0.5293276956486881, 1: 9.024365602900101},
 {0: 0.5211314007639221, 1: 12.330734876167275},
 {0: 0.5790125904629722, 1: 3.66405269761606},
 {0: 0.5518856781499747, 1: 5.31828529401464},
 {0: 0.5539775421904756, 1: 5.131555825898892},
 {0: 0.543856884479702, 1: 6.200359315666979},
 {0: 0.7385379329128022, 1: 1.5480513390354051},
 {0: 0.6219150591800795, 1: 2.5506080354743346},
 {0: 0.5197045768007913, 1: 13.187407729049067}]

## NN MODEL

In [5]:
def prelim_model_multi(input_dimensions, optimizer = 'adam', k_initializer = 'glorot_uniform', hidden_1_neurons = 512, output_units = output_dimensions, out_activation = 'sigmoid'):
    prelim_model_i = Sequential()
    prelim_model_i.add(Dense(hidden_1_neurons, input_shape=(input_dimensions,), activation = 'relu', kernel_initializer=k_initializer))
    prelim_model_i.add(Dense(10, activation = 'sigmoid'))
    prelim_model_i.compile(loss='binary_crossentropy', optimizer=optimizer)
    return prelim_model_i

In [6]:
model_i = prelim_model_multi(hidden_1_neurons=100, input_dimensions=TRAIN_PCA_X.shape[1])
model_i.fit(TRAIN_PCA_X, train_Y, epochs=15, batch_size=1000, verbose=1, class_weight=class_weight)
predicted_output = model_i.predict(TEST_PCA_X)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [13]:
def my_conf(true, predict):
    confusion_dict={'TP':0, 'TN':0, 'FP':0, 'FN':0}
    conf_per_class=[{'TP':0, 'TN':0, 'FP':0, 'FN':0} for class_n in range(true.shape[1])]
    for true_i, predict_j in zip(true, predict):

        for class_i, (i, j) in enumerate(zip(true_i, predict_j)):

            if i==1 and j==1:
                conf_per_class[class_i]['TP']+=1
            elif i==0 and j==0:
                conf_per_class[class_i]['TN']+=1

            elif i==1 and j==0:
                conf_per_class[class_i]['FN']+=1
            elif i==0 and j==1:
                conf_per_class[class_i]['FP']+=1        
    return(conf_per_class)

def my_metrics(y_true, y_pred):
    conf=my_conf(y_true, y_pred)
    sens=[]
    spec=[]
    for conditions in conf:
        sens.append(conditions['TP']/(conditions['TP']+conditions['FN']))
        spec.append(conditions['TN']/(conditions['TN']+conditions['FP']))
    print(f'SENSITIVITY PER LABEL: {sens}\n')
    print(f'SPECIFICITY PER LABEL: {spec}\n')
    return(sens, spec)

## calculate the mean value of each label predicted and label the value as 1 if it is greater than that mean and 0 otherwise
def my_multi_label_acc(y_true, predictions):
    class_label_mean={}
    for i, class_label in enumerate(predictions.T):
        class_label_mean[i]=np.mean(class_label)
        
    lst=[[] for i in class_label_mean]
    for class_label_i in class_label_mean:
        for label in predictions.T[class_label_i]:
            if label >= class_label_mean[class_label_i]:
                lst[class_label_i].append(1)
            else:
                lst[class_label_i].append(0)
    return(np.array(lst).T)

In [14]:
out=my_multi_label_acc(test_y, predicted_output)
mm=my_metrics(test_y, out)

SENSITIVITY PER LABEL: [0.4342068188222034, 0.5472622881949307, 0.523037875829754, 0.16899355200448557, 0.6340067064692892, 0.6068883610451307, 0.5411742096665706, 0.6088765931285617, 0.4603462554718618, 0.5673253208392748]

SPECIFICITY PER LABEL: [0.879541373117834, 0.8859941472931231, 0.7291165156471181, 0.7838879689475278, 0.7053097495209185, 0.6493010354714926, 0.7246772550394976, 0.7545464891316718, 0.6303576199381703, 0.6960527576405966]



## MODEL HAMMING LOSS

In [15]:
hamming_loss(test_y, out)

0.28072387274221927