In [1]:
import numpy as np 
import scipy
from scipy.io import arff
import pandas as pd
import tensorflow as tf 
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, f1_score, cohen_kappa_score, hamming_loss
from sklearn.model_selection import train_test_split
import random 
random.seed = 0

In [2]:
CAL_data_train = scipy.io.arff.loadarff("/home/karim/Documents/research/MLML datasets/CAL500/CAL500.arff")

features_labels = CAL_data_train[0]
features_labels_df = pd.DataFrame(features_labels)
print("Number of training samples is: {}".format(len(CAL_data_train[0])))
features = features_labels_df.values[:,:-174]
labels = features_labels_df.values[:,-174:]
LABEL_LIST = list(features_labels_df.columns[-174:])

features, test_features, labels, test_labels = train_test_split(features, labels, test_size=0.33, random_state=0)

Number of training samples is: 502


In [3]:
input_shape = 68
output_shape = 174
hidden_layer_1_shape = 48
hidden_layer_2_shape = 24

def get_weights(shape):
    w = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
    #variable_summaries(w)
    return w

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    b = tf.Variable(initial)
    #variable_summaries(b)
    return b

def full_layer(input, size):
    in_size = int(input.get_shape()[1])
    W = get_weights([in_size, size])
    b = bias_variable([size])
    return tf.matmul(input, W) + b

def weighted_loss(y_true, y_pred, positive_weights, negative_weights):
    # clip to prevent NaN's and Inf's
    y_pred = tf.clip_by_value(y_pred, 1e-7, 1-1e-7, name=None)
    #y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
    # calc
    loss = (-y_true * tf.log(y_pred) * positive_weights) - ((1.0 - y_true) * tf.log(1.0 - y_pred) * negative_weights)
    loss = tf.reduce_mean(loss)
    return loss

In [34]:
# Define a 2 layers network to train 
y = tf.placeholder(tf.float32, [None, output_shape], name="true_labels")
x_input = tf.placeholder(tf.float32, [None,input_shape],name="input_layer")
h1 = tf.nn.relu(full_layer(x_input, hidden_layer_1_shape))
h2 = tf.nn.relu(full_layer(h1, hidden_layer_2_shape))
#h3 = tf.nn.relu(full_layer(h2, hidden_layer_2_shape))
#h4 = tf.nn.relu(full_layer(h3, hidden_layer_2_shape))
logits = full_layer(h2,output_shape)
output = tf.nn.sigmoid(logits)

## Train on complete dataset

In [50]:
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=y))
# Learning rate decay
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(learning_rate=0.1, global_step=global_step, decay_steps=1000,
                                          decay_rate=0.95,staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
correct_prediction = tf.equal(tf.round(output), y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [53]:
NUM_EPOCHS = 50000
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(NUM_EPOCHS):
        epoch_loss, epoch_accuracy,epoch_output, _ = sess.run([loss, accuracy,output, train_step],feed_dict={x_input: 
                                                                                         features,y: labels,})
        if (epoch+1)% 500 == 0:
            val_losses, val_accuracies, val_output,current_learning_rate = sess.run([loss, accuracy,output,learning_rate],feed_dict={
                                                                                          x_input: test_features,
                                                                                          y:test_labels})
            print("Epoch #{}".format(epoch+1), "Loss: {:.4f}".format(epoch_loss), 
                  "accuracy: {:.4f}".format(epoch_accuracy),
                  "Test loss: {:.4f}".format(val_losses), 
                  "Test accuracy: {:.4f}".format(val_accuracies))

Epoch #500 Loss: 0.3326 accuracy: 0.8631 Test loss: 0.3344 Test accuracy: 0.8616
Epoch #1000 Loss: 0.3325 accuracy: 0.8631 Test loss: 0.3344 Test accuracy: 0.8616
Epoch #1500 Loss: 0.3324 accuracy: 0.8632 Test loss: 0.3343 Test accuracy: 0.8617
Epoch #2000 Loss: 0.3324 accuracy: 0.8633 Test loss: 0.3342 Test accuracy: 0.8617
Epoch #2500 Loss: 0.3323 accuracy: 0.8633 Test loss: 0.3341 Test accuracy: 0.8617
Epoch #3000 Loss: 0.3323 accuracy: 0.8634 Test loss: 0.3341 Test accuracy: 0.8617
Epoch #3500 Loss: 0.3322 accuracy: 0.8634 Test loss: 0.3340 Test accuracy: 0.8617
Epoch #4000 Loss: 0.3322 accuracy: 0.8635 Test loss: 0.3339 Test accuracy: 0.8617
Epoch #4500 Loss: 0.3321 accuracy: 0.8635 Test loss: 0.3339 Test accuracy: 0.8617
Epoch #5000 Loss: 0.3321 accuracy: 0.8636 Test loss: 0.3338 Test accuracy: 0.8617
Epoch #5500 Loss: 0.3320 accuracy: 0.8636 Test loss: 0.3337 Test accuracy: 0.8617
Epoch #6000 Loss: 0.3320 accuracy: 0.8636 Test loss: 0.3337 Test accuracy: 0.8617
Epoch #6500 Loss:

Epoch #50000 Loss: 0.3299 accuracy: 0.8635 Test loss: 0.3324 Test accuracy: 0.8617


In [54]:
# On training 
print("AUC is:{:.3f}".format(roc_auc_score(labels, epoch_output)))
print("F1 is:{:.3f}".format(f1_score(labels, np.round(epoch_output),average="macro")))
print("Recall is:{:.3f}".format(recall_score(labels, np.round(epoch_output),average="macro")))
print("Precision is:{:.3f}".format(precision_score(labels, np.round(epoch_output),average="macro")))
print("Hamming loss is:{:.3f}".format(hamming_loss(labels, np.round(epoch_output))))

AUC is:0.514
F1 is:0.040
Recall is:0.052
Precision is:0.044
Hamming loss is:0.136


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [57]:
# On test
print("AUC is:{:.3f}".format(roc_auc_score(test_labels, val_output)))
print("F1 is:{:.3f}".format(f1_score(test_labels, np.round(val_output),average="macro")))
print("Recall is:{:.3f}".format(recall_score(test_labels, np.round(val_output),average="macro")))
print("Precision is:{:.3f}".format(precision_score(test_labels, np.round(val_output),average="macro")))
print("Hamming loss is:{:.3f}".format(hamming_loss(test_labels, np.round(val_output))))

AUC is:0.525
F1 is:0.039
Recall is:0.052
Precision is:0.032
Hamming loss is:0.138


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


## With missing labels

In [59]:
ones_indices = np.nonzero(labels)
ratio_of_hidden_samples = 0.4
number_of_hidden_samples = int(len(ones_indices[0]) * ratio_of_hidden_samples)
random_indices = random.sample(list(np.arange(len(ones_indices[0]))),number_of_hidden_samples)
indices_to_hide = (ones_indices[0][random_indices] , ones_indices[1][random_indices])
labels_with_missing_positives = np.copy(labels)
for counter in range (number_of_hidden_samples):
    labels_with_missing_positives[indices_to_hide[0][counter]][indices_to_hide[1][counter]] = 0
    
    
# Training with missing labels with 40%
NUM_EPOCHS = 50000
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(NUM_EPOCHS):
        epoch_loss, epoch_accuracy,epoch_output, _ = sess.run([loss, accuracy,output, train_step],feed_dict={x_input: 
                                                                                         features,y: labels_with_missing_positives})
        if (epoch+1)% 500 == 0:
            val_losses, val_accuracies, val_output,current_learning_rate = sess.run([loss, accuracy,output,learning_rate],feed_dict={
                                                                                          x_input: test_features,
                                                                                          y:test_labels})
            print("Epoch #{}".format(epoch+1), "Loss: {:.4f}".format(epoch_loss), 
                  "accuracy: {:.4f}".format(epoch_accuracy), 
                  "Test loss: {:.4f}".format(val_losses), 
                  "Test accuracy: {:.4f}".format(val_accuracies))

Epoch #500 Loss: 0.2524 accuracy: 0.9101 Test loss: 0.3589 Test accuracy: 0.8537
Epoch #1000 Loss: 0.2522 accuracy: 0.9103 Test loss: 0.3590 Test accuracy: 0.8537
Epoch #1500 Loss: 0.2521 accuracy: 0.9104 Test loss: 0.3589 Test accuracy: 0.8537
Epoch #2000 Loss: 0.2520 accuracy: 0.9105 Test loss: 0.3589 Test accuracy: 0.8537
Epoch #2500 Loss: 0.2520 accuracy: 0.9106 Test loss: 0.3588 Test accuracy: 0.8537
Epoch #3000 Loss: 0.2519 accuracy: 0.9107 Test loss: 0.3587 Test accuracy: 0.8537
Epoch #3500 Loss: 0.2518 accuracy: 0.9108 Test loss: 0.3586 Test accuracy: 0.8537
Epoch #4000 Loss: 0.2517 accuracy: 0.9109 Test loss: 0.3585 Test accuracy: 0.8537
Epoch #4500 Loss: 0.2517 accuracy: 0.9110 Test loss: 0.3585 Test accuracy: 0.8537
Epoch #5000 Loss: 0.2516 accuracy: 0.9110 Test loss: 0.3584 Test accuracy: 0.8537
Epoch #5500 Loss: 0.2516 accuracy: 0.9111 Test loss: 0.3583 Test accuracy: 0.8537
Epoch #6000 Loss: 0.2515 accuracy: 0.9111 Test loss: 0.3582 Test accuracy: 0.8537
Epoch #6500 Loss:

Epoch #50000 Loss: 0.2496 accuracy: 0.9112 Test loss: 0.3578 Test accuracy: 0.8537


## With fixed negative weights

In [60]:
train_negative_weights = np.zeros_like(labels) + 1 
train_positive_weights = np.zeros_like(labels) + 1
for counter in range (number_of_hidden_samples):
    train_negative_weights[indices_to_hide[0][counter]][indices_to_hide[1][counter]] = 0

In [61]:
positive_weights = tf.placeholder(tf.float32, [None,output_shape], name = "Positive_weights")
negative_weights = tf.placeholder(tf.float32, [None, output_shape], name="negative_weights")
my_weights_loss = weighted_loss(y_true= y, y_pred= output,
                              positive_weights= positive_weights, negative_weights= negative_weights)

train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(my_weights_loss,global_step=global_step)

In [62]:
# Training with negative weights!
NUM_EPOCHS = 50000
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(NUM_EPOCHS):
        epoch_my_weights_loss, epoch_loss, epoch_accuracy,epoch_output, _ = sess.run([my_weights_loss, loss, accuracy,output, train_step],feed_dict={x_input: 
                                                                                         features,y: labels_with_missing_positives,positive_weights: train_positive_weights,
                                                                                                  negative_weights: train_negative_weights})
        if (epoch+1)% 500 == 0:
            val_losses, val_accuracies, val_output,current_learning_rate = sess.run([loss, accuracy,output,learning_rate],feed_dict={
                                                                                          x_input: test_features,
                                                                                          y:test_labels})
            print("Epoch #{}".format(epoch+1), "Loss: {:.4f}".format(epoch_loss), 
                  "Weighted Loss: {:.4f}".format(epoch_my_weights_loss),"accuracy: {:.4f}".format(epoch_accuracy), 
                  "Test loss: {:.4f}".format(val_losses), "Test accuracy: {:.4f}".format(val_accuracies))

Epoch #500 Loss: 0.2553 Weighted Loss: 0.2367 accuracy: 0.9084 Test loss: 0.3485 Test accuracy: 0.8573
Epoch #1000 Loss: 0.2551 Weighted Loss: 0.2365 accuracy: 0.9085 Test loss: 0.3486 Test accuracy: 0.8573
Epoch #1500 Loss: 0.2551 Weighted Loss: 0.2364 accuracy: 0.9086 Test loss: 0.3485 Test accuracy: 0.8573
Epoch #2000 Loss: 0.2550 Weighted Loss: 0.2363 accuracy: 0.9086 Test loss: 0.3484 Test accuracy: 0.8573
Epoch #2500 Loss: 0.2549 Weighted Loss: 0.2363 accuracy: 0.9087 Test loss: 0.3484 Test accuracy: 0.8574
Epoch #3000 Loss: 0.2548 Weighted Loss: 0.2362 accuracy: 0.9087 Test loss: 0.3483 Test accuracy: 0.8574
Epoch #3500 Loss: 0.2548 Weighted Loss: 0.2361 accuracy: 0.9088 Test loss: 0.3482 Test accuracy: 0.8574
Epoch #4000 Loss: 0.2547 Weighted Loss: 0.2361 accuracy: 0.9088 Test loss: 0.3481 Test accuracy: 0.8574
Epoch #4500 Loss: 0.2547 Weighted Loss: 0.2360 accuracy: 0.9089 Test loss: 0.3481 Test accuracy: 0.8574
Epoch #5000 Loss: 0.2546 Weighted Loss: 0.2360 accuracy: 0.9090 T

Epoch #40000 Loss: 0.2529 Weighted Loss: 0.2342 accuracy: 0.9091 Test loss: 0.3470 Test accuracy: 0.8573
Epoch #40500 Loss: 0.2528 Weighted Loss: 0.2342 accuracy: 0.9091 Test loss: 0.3470 Test accuracy: 0.8573
Epoch #41000 Loss: 0.2528 Weighted Loss: 0.2342 accuracy: 0.9091 Test loss: 0.3470 Test accuracy: 0.8573
Epoch #41500 Loss: 0.2528 Weighted Loss: 0.2342 accuracy: 0.9091 Test loss: 0.3470 Test accuracy: 0.8573
Epoch #42000 Loss: 0.2528 Weighted Loss: 0.2342 accuracy: 0.9091 Test loss: 0.3470 Test accuracy: 0.8573
Epoch #42500 Loss: 0.2528 Weighted Loss: 0.2342 accuracy: 0.9091 Test loss: 0.3470 Test accuracy: 0.8573
Epoch #43000 Loss: 0.2528 Weighted Loss: 0.2342 accuracy: 0.9091 Test loss: 0.3470 Test accuracy: 0.8573
Epoch #43500 Loss: 0.2528 Weighted Loss: 0.2342 accuracy: 0.9091 Test loss: 0.3470 Test accuracy: 0.8573
Epoch #44000 Loss: 0.2528 Weighted Loss: 0.2342 accuracy: 0.9091 Test loss: 0.3470 Test accuracy: 0.8573
Epoch #44500 Loss: 0.2528 Weighted Loss: 0.2342 accurac