In [1]:
# General Imports
import os
import numpy as np
import pandas as pd
from time import strftime, localtime
import matplotlib.pyplot as plt
from skimage.transform import resize
import seaborn as sn
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, f1_score, cohen_kappa_score

# [TODO] edit this path to the tfslim directoy "https://github.com/tensorflow/models/tree/master/research/slim"
SLIM_PATH = '/srv/workspace/research/mlml/models/research/slim'
os.chdir(SLIM_PATH)

# Deep Learning
import tensorflow as tf
from nets import inception
from datasets import dataset_utils
from preprocessing import inception_preprocessing
from tensorflow.contrib import slim

from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, roc_auc_score, \
    hamming_loss
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.utils import check_random_state

plt.rcParams.update({'font.size': 22})

SOURCE_PATH = "/srv/workspace/research/mlml/mlml_weightedLoss/"
IMAGES_PATH = "/srv/workspace/research/mlml/datasets/mscoco/train_formatted_npz/"
TEST_IMAGES_PATH = IMAGES_PATH
OUTPUT_PATH ="/srv/workspace/research/mlml/experiments_results/"
PRETRAINED_MODEL_DIR = '/srv/workspace/research/mlml/pretrained_models/'

INPUT_SHAPE = (224, 224, 3)
INPUT_IMAGE_MEAN = [0.485, 0.456, 0.406]
INPUT_IMAGE_STD = [0.229, 0.224, 0.225]

labels_set = pd.read_csv('/srv/workspace/research/mlml/mlml_weightedLoss/labels/missing_labels0.0/missing_labels0.0.csv')

LABELS_LIST = labels_set.columns
NUM_CLASSES = len(LABELS_LIST)

# Training paramaeters
BATCH_SIZE = 32
TRAINING_STEPS = int(len(labels_set)/BATCH_SIZE)
NUM_EPOCHS = 5
Pos_balance_weights = np.round(1 / (np.sum(labels_set.values, axis = 0)/ len(labels_set))).astype(np.float32)

  from ._conv import register_converters as _register_converters


In [None]:
# Define helper functions
def download_pretrained_model():
    url = "http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz"
    if not tf.gfile.Exists(PRETRAINED_MODEL_DIR):
        tf.gfile.MakeDirs(PRETRAINED_MODEL_DIR)

    dataset_utils.download_and_uncompress_tarball(url, PRETRAINED_MODEL_DIR)

def get_weights(shape):
    w = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
    return w

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    b = tf.Variable(initial)
    return b

def full_layer(input, size):
    in_size = int(input.get_shape()[1])
    W = get_weights([in_size, size])
    b = bias_variable([size])
    return tf.matmul(input, W) + b

def dataset_from_csv(csv_path, **kwargs):
    """
        Load dataset from a csv file.
        kwargs are forwarded to the pandas.read_csv function.
    """
    df = pd.read_csv(csv_path, **kwargs)

    dataset = (
        tf.data.Dataset.from_tensor_slices(
            {
                key:df[key].values
                for key in df
            }
        )
    )
    return dataset

def set_tensor_shape(tensor, tensor_shape):
        """
            set shape for a tensor (not in place, as opposed to tf.set_shape)
        """
        tensor.set_shape(tensor_shape)
        return tensor

def mscoco_labels_idx_to_names(labels,labelnames = LABELS_LIST):
    return labelnames[np.where(labels == 1)]


# Dataset pipelines
def get_weights_py(image_id):
    weights_negative = global_weights_negative[global_weights_negative.iloc[:,0] == image_id]
    samples_weights_negative = weights_negative.iloc[:, 1:].values.flatten()
    samples_weights_negative = samples_weights_negative.astype(np.float32)
    return samples_weights_negative

def tf_get_weights_py(sample,device = "/cpu:0"):
    with tf.device(device):
        input_args = [sample["Unnamed: 0"]]
        negative_weights = tf.py_func(get_weights_py,
            input_args,
            [tf.float32],
            stateful=False)
        res = dict(list(sample.items()) + [("negative_weights", negative_weights)])
        return res
    
def load_image_npz(*args):
    """
        loads spectrogram with error tracking.
        args : song ID, path to dataset
        return:
            Features: numpy ndarray, computed features (if no error occured, otherwise: 0)
            Error: boolean, False if no error, True if an error was raised during features computation.
    """
    # TODO: edit path
    path = IMAGES_PATH
    image_id, dummy_path = args
    try:
        image = np.load(os.path.join(path, str(image_id) + '.npz'))['image']
        image = image.astype(np.float32)
        image /= 255.0
        image = (image - INPUT_IMAGE_MEAN) / INPUT_IMAGE_STD
        image = resize(image, INPUT_SHAPE)
        image = image.astype(np.float32)
        return image, False
    except Exception as err:
        print("\n Error while computing features for " + str(image_id) + '\n')
        return np.float32(0.0), True

def load_imge_tf(sample, identifier_key="Unnamed: 0",
                 path="/srv/workspace/research/mlml/datasets/mscoco/train_formatted_npz/", device="/cpu:0",
                 features_key="features"):
    """
        wrap load_images into a tensorflow function.
    """
    with tf.device(device):
        input_args = [sample[identifier_key], tf.constant(path)]
        res = tf.py_func(load_image_npz,
                         input_args,
                         (tf.float32, tf.bool),
                         stateful=False),
        image, error = res[0]
        image = tf.convert_to_tensor(image, dtype=tf.float32)
        res = dict(list(sample.items()) + [(features_key, image), ("error", error)])
        return res

# Dataset pipelines
def get_labels_py(image_id):
    labels = global_labels[global_labels.iloc[:,0] == image_id]
    labels = labels.iloc[:, 1:].values.flatten()
    labels = labels.astype(np.float32)
    return labels


def tf_get_labels_py(sample, device="/cpu:0"):
    with tf.device(device):
        input_args = [sample["Unnamed: 0"]]
        labels = tf.py_func(get_labels_py,
                            input_args,
                            [tf.float32],
                            stateful=False)
        res = dict(list(sample.items()) + [("binary_label", labels)])
        return res


def get_dataset(input_csv, input_shape=INPUT_SHAPE, batch_size=BATCH_SIZE, shuffle=True,
                infinite_generator=True, random_crop=False,
                num_parallel_calls=32):
    # build dataset from csv file
    dataset = dataset_from_csv(input_csv)
    # Shuffle data
    if shuffle:
        dataset = dataset.shuffle(buffer_size=100, seed=0, reshuffle_each_iteration=True)

    # load image
    dataset = dataset.map(lambda sample: load_imge_tf(sample), num_parallel_calls=1)

    # filter out errors
    dataset = dataset.filter(lambda sample: tf.logical_not(sample["error"]))

    # set features shape
    dataset = dataset.map(lambda sample: dict(sample,
                                              features=set_tensor_shape(sample["features"], input_shape)))

    dataset = dataset.map(lambda sample: tf_get_labels_py(sample), num_parallel_calls=1)

    # set output shape
    dataset = dataset.map(lambda sample: dict(sample, binary_label=set_tensor_shape(
        sample["binary_label"], (NUM_CLASSES))))
    
    # load weights
    dataset = dataset.map(lambda sample: tf_get_weights_py(sample), num_parallel_calls=1)
    
    dataset = dataset.map(lambda sample: dict(sample, negative_weights=set_tensor_shape(
    sample["negative_weights"], (NUM_CLASSES))))
    
    if infinite_generator:
        # Repeat indefinitly
        dataset = dataset.repeat(count=-1)

    # Make batch
    dataset = dataset.batch(batch_size)

    # Select only features and annotation
    dataset = dataset.map(lambda sample: (sample["features"], sample["binary_label"],
                                          sample["negative_weights"]))

    return dataset

In [None]:
def create_analysis_report(model_output, groundtruth, output_path, LABELS_LIST, validation_output=None,
                           validation_groundtruth=None):
    """
    Create a report of all the different evaluation metrics, including optimizing the threshold with the validation set
    if it is passed in the parameters
    """
    # Round the probabilities at 0.5
    model_output_rounded = np.round(model_output)
    model_output_rounded = np.clip(model_output_rounded, 0, 1)
    # Create a dataframe where we keep all the evaluations, starting by prediction accuracy
    accuracies_perclass = sum(model_output_rounded == groundtruth) / len(groundtruth)
    results_df = pd.DataFrame(columns=LABELS_LIST)
    results_df.index.astype(str, copy=False)
    percentage_of_positives_perclass = sum(groundtruth) / len(groundtruth)
    results_df.loc[0] = percentage_of_positives_perclass
    results_df.loc[1] = accuracies_perclass
    results_df.index = ['Ratio of positive samples', 'Model accuracy']

    # plot the accuracies per class
    results_df.T.plot.bar(figsize=(22, 12), fontsize=18)
    plt.title('Model accuracy vs the ratio of positive samples per class')
    plt.xticks(rotation=45)
    plt.savefig(os.path.join(output_path, "accuracies_vs_positiveRate.pdf"), format="pdf")
    plt.savefig(os.path.join(output_path, "accuracies_vs_positiveRate.png"))

    # Getting the true positive rate perclass
    true_positives_ratio_perclass = sum((model_output_rounded == groundtruth) * (groundtruth == 1)) / sum(groundtruth)
    results_df.loc[2] = true_positives_ratio_perclass
    # Get true negative ratio
    true_negative_ratio_perclass = sum((model_output_rounded == groundtruth)
                                       * (groundtruth == 0)) / (len(groundtruth) - sum(groundtruth))
    results_df.loc[3] = true_negative_ratio_perclass
    # compute additional metrics (AUC,f1,recall,precision)
    auc_roc_per_label = roc_auc_score(groundtruth, model_output, average=None)
    precision_perlabel = precision_score(groundtruth, model_output_rounded, average=None)
    recall_perlabel = recall_score(groundtruth, model_output_rounded, average=None)
    f1_perlabel = f1_score(groundtruth, model_output_rounded, average=None)
    kappa_perlabel = [cohen_kappa_score(groundtruth[:, x], model_output_rounded[:, x]) for x in range(len(LABELS_LIST))]
    results_df = results_df.append(
        pd.DataFrame([auc_roc_per_label,recall_perlabel, precision_perlabel, f1_perlabel, kappa_perlabel], columns=LABELS_LIST))
    results_df.index = ['Ratio of positive samples', 'Model accuracy', 'True positives ratio',
                        'True negatives ratio', "AUC", "Recall", "Precision", "f1-score", "Kappa score"]

    # Adjusting threshold based on validation set
    if (validation_groundtruth is not None and validation_output is not None):
        np.savetxt(os.path.join(output_path, 'validation_predictions.out'), validation_output, delimiter=',')
        np.savetxt(os.path.join(output_path, 'valid_ground_truth_classes.txt'), validation_groundtruth, delimiter=',')
        thresholds = np.arange(0, 1, 0.01)
        f1_array = np.zeros((len(LABELS_LIST), len(thresholds)))
        for idx, label in enumerate(LABELS_LIST):
            f1_array[idx, :] = [
                f1_score(validation_groundtruth[:, idx], np.clip(np.round(validation_output[:, idx] - threshold + 0.5), 0, 1))
                for threshold in thresholds]
        threshold_arg = np.argmax(f1_array, axis=1)
        threshold_per_class = thresholds[threshold_arg]

        # plot the f1 score across thresholds
        plt.figure(figsize=(20, 20))
        for idx, x in enumerate(LABELS_LIST):
            plt.plot(thresholds, f1_array[idx, :], linewidth=5)
        plt.legend(LABELS_LIST, loc='best')
        plt.title("F1 Score vs different prediction threshold values for each class")
        plt.savefig(os.path.join(output_path, "f1_score_vs_thresholds.pdf"), format="pdf")
        plt.savefig(os.path.join(output_path, "f1_score_vs_thresholds.png"))

        # Applying thresholds optimized per class
        model_output_rounded = np.zeros_like(model_output)
        for idx, label in enumerate(LABELS_LIST):
            model_output_rounded[:, idx] = np.clip(np.round(model_output[:, idx] - threshold_per_class[idx] + 0.5), 0, 1)

        accuracies_perclass = sum(model_output_rounded == groundtruth) / len(groundtruth)
        # Getting the true positive rate perclass
        true_positives_ratio_perclass = sum((model_output_rounded == groundtruth) * (groundtruth == 1)) / sum(
            groundtruth)
        # Get true negative ratio
        true_negative_ratio_perclass = sum((model_output_rounded == groundtruth)
                                           * (groundtruth == 0)) / (len(groundtruth) - sum(groundtruth))
        results_df = results_df.append(
            pd.DataFrame([accuracies_perclass, true_positives_ratio_perclass,
                          true_negative_ratio_perclass], columns=LABELS_LIST))
        # compute additional metrics (AUC,f1,recall,precision)
        auc_roc_per_label = roc_auc_score(groundtruth, model_output, average=None)
        precision_perlabel = precision_score(groundtruth, model_output_rounded, average=None)
        recall_perlabel = recall_score(groundtruth, model_output_rounded, average=None)
        f1_perlabel = f1_score(groundtruth, model_output_rounded, average=None)
        kappa_perlabel = [cohen_kappa_score(groundtruth[:, x], model_output_rounded[:, x]) for x in
                          range(len(LABELS_LIST))]
        results_df = results_df.append(
            pd.DataFrame([auc_roc_per_label,recall_perlabel, precision_perlabel, f1_perlabel,kappa_perlabel],
                         columns=LABELS_LIST))
        results_df.index = ['Ratio of positive samples', 'Model accuracy', 'True positives ratio',
                            'True negatives ratio', "AUC", "Precision", "Recall", "f1-score",  "Kappa score",
                            'Optimized model accuracy', 'Optimized true positives ratio',
                            'Optimized true negatives ratio', "Optimized AUC",
                            "Optimized precision", "Optimized recall", "Optimized f1-score",  "Optimized Kappa score"]

    results_df['average'] = results_df.mean(numeric_only=True, axis=1)
    results_df.T.to_csv(os.path.join(output_path, "results_report.csv"), float_format="%.2f")
    return results_df


def evaluate_model(test_pred_prob, test_classes, saving_path, evaluation_file_path):
    """
    Evaluates a given model using accuracy, area under curve and hamming loss
    :param model: model to be evaluated
    :param spectrograms: the test set spectrograms as an np.array
    :param test_classes: the ground truth labels
    :return: accuracy, auc_roc, hamming_error
    """
    test_pred = np.round(test_pred_prob)
    # Accuracy
    accuracy = 100 * accuracy_score(test_classes, test_pred)
    print("Exact match accuracy is: " + str(accuracy) + "%")
    # Area Under the Receiver Operating Characteristic Curve (ROC AUC)
    auc_roc = roc_auc_score(test_classes, test_pred_prob)
    print("Macro Area Under the Curve (AUC) is: " + str(auc_roc))
    auc_roc_micro = roc_auc_score(test_classes, test_pred_prob, average="micro")
    print("Micro Area Under the Curve (AUC) is: " + str(auc_roc_micro))
    auc_roc_weighted = roc_auc_score(test_classes, test_pred_prob, average="weighted")
    print("Weighted Area Under the Curve (AUC) is: " + str(auc_roc_weighted))
    # Hamming loss is the fraction of labels that are incorrectly predicted.
    hamming_error = hamming_loss(test_classes, test_pred)
    print("Hamming Loss (ratio of incorrect tags) is: " + str(hamming_error))
    with open(evaluation_file_path, "w") as f:
        f.write("Exact match accuracy is: " + str(accuracy) + "%\n" + "Area Under the Curve (AUC) is: " + str(auc_roc)
                + "\nMicro AUC is:" + str(auc_roc_micro) + "\nWeighted AUC is:" + str(auc_roc_weighted)
                + "\nHamming Loss (ratio of incorrect tags) is: " + str(hamming_error))
    print("saving prediction to disk")
    np.savetxt(os.path.join(saving_path, 'predictions.out'), test_pred_prob, delimiter=',')
    np.savetxt(os.path.join(saving_path, 'test_ground_truth_classes.txt'), test_classes, delimiter=',')
    return accuracy, auc_roc, hamming_error

In [2]:
def weighted_loss(y_true, y_pred, positive_weights, negative_weights):
    # clip to prevent NaN's and Inf's
    y_pred = tf.clip_by_value(y_pred, 1e-7, 1-1e-7, name=None)
    #y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
    # calc
    loss = (-y_true * tf.log(y_pred) * positive_weights) - ((1.0 - y_true) * tf.log(1.0 - y_pred) * negative_weights)
    loss = tf.reduce_mean(loss)
    return loss

## fine-tune pretrained model on the dataset

In [None]:
for split in np.arange(1,5):
    for ratio in np.arange(0,1,0.2): 
        tf.reset_default_graph()
        EXPERIMENTNAME = "weighted_CE_ratio" + str(round(ratio, 1)) + "split_" + str(split)
        print("Current Experiment: " + EXPERIMENTNAME + "\n\n\n")
        global_labels = pd.read_csv('/srv/workspace/research/mlml/mlml_weightedLoss/labels/missing_labels'+
                                    str(round(ratio, 1))+'/train' + str(split) +"_" + str(round(ratio, 1)) + '.csv')
        global_weights_negative = pd.read_csv('/srv/workspace/research/mlml/mlml_weightedLoss/labels/missing_labels'+
                                    str(round(ratio, 1))+'/negative_weights' + str(split) +"_"  + str(round(ratio, 1)) + '.csv')
        # Loading datasets
        training_dataset = get_dataset('/srv/workspace/research/mlml/mlml_weightedLoss/labels/missing_labels'+
                                    str(round(ratio, 1))+'/train' + str(split) +"_" + str(round(ratio, 1)) + '.csv')
        with tf.Graph().as_default():
            # Setting up training generator
            training_iterator = training_dataset.make_one_shot_iterator()
            training_next_element = training_iterator.get_next()

            # Setting up variables
            input_labels = tf.placeholder(tf.float32, [None, NUM_CLASSES], name="true_labels")
            negative_weights = tf.placeholder(tf.float32, [None, NUM_CLASSES], name="negative_weights")
            input_images = tf.placeholder(tf.float32, [None, 224, 224, 3], name="input")
            train_phase = tf.placeholder(tf.bool, name="is_training")

            # Create the model, use the default arg scope to configure the batch norm parameters.
            with slim.arg_scope(inception.inception_v1_arg_scope()):
                ignored_logits, end_points = inception.inception_v1(input_images, num_classes=NUM_CLASSES, 
                                                           is_training=train_phase)

            featured_extracted = end_points['Mixed_5c']

            with tf.name_scope('trainable/Fully_connected_1'):
                flattened = tf.reshape(featured_extracted, [-1, 7*7* 1024])
                fully1 = tf.nn.sigmoid(full_layer(flattened, 256))

            with tf.name_scope('trainable/Fully_connected_2'):
                #dropped = tf.nn.dropout(fully1, keep_prob=current_keep_prob)
                output_logits = full_layer(fully1, NUM_CLASSES)
            
            probabilities = tf.nn.sigmoid(output_logits)
            tf.summary.histogram('outputs', probabilities)

            trainable_layers = [var for var in tf.global_variables() if ("trainable" in var.op.name)]
            positive_imbalance_weights = tf.constant(Pos_balance_weights)
            loss = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(input_labels,output_logits,
                                                                           positive_imbalance_weights))
            
            my_weights_loss = weighted_loss(y_true= input_labels, y_pred= probabilities,
                              positive_weights= positive_imbalance_weights, negative_weights= negative_weights)
                


            global_step = tf.Variable(0, trainable=False)
            learning_rate = tf.train.exponential_decay(learning_rate=0.1, global_step=global_step,
                                                       decay_steps=1000,
                                                      decay_rate=0.95,staircase=True)
            train_step = tf.train.AdadeltaOptimizer(learning_rate).minimize(my_weights_loss,
                                                                            var_list=trainable_layers)
            
            # define accuracy
            correct_prediction = tf.equal(tf.round(probabilities), input_labels)
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

            # Adding tensorboard summaries
            tf.summary.scalar('Original_cross_entropy', loss)
            tf.summary.scalar('Weighted cross entropy',  my_weights_loss)
            tf.summary.scalar('Accuracy', accuracy)
            # Merge all the summaries
            merged = tf.summary.merge_all()

            # restoring pretrained model weights
            checkpoint_exclude_scopes = ["InceptionV1/Logits", "InceptionV1/AuxLogits"]
            exclusions = [scope.strip() for scope in checkpoint_exclude_scopes]
            variables_to_restore = []
            for var in slim.get_model_variables():
                for exclusion in exclusions:
                    if var.op.name.startswith(exclusion):
                        break
                else:
                    variables_to_restore.append(var)
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(PRETRAINED_MODEL_DIR, 'inception_v1.ckpt'),
                variables_to_restore)

            # Setting up saving directory
            experiment_name = strftime("%d-%m_%H-%M", localtime())
            exp_dir = os.path.join(OUTPUT_PATH, EXPERIMENTNAME, experiment_name)

            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()

            epoch_losses_history, epoch_accurcies_history, val_losses_history, val_accuracies_history = [], [], [], []
            #my_loss_history, my_loss_val_history = [], []
            with tf.Session() as sess:
                # Write summaries to LOG_DIR -- used by TensorBoard
                train_writer = tf.summary.FileWriter(exp_dir + '/tensorboard/train', graph=tf.get_default_graph())
                test_writer = tf.summary.FileWriter(exp_dir + '/tensorboard/test', graph=tf.get_default_graph())
                print("Execute the following in a terminal:\n" + "tensorboard --logdir=" + exp_dir)
                sess.run(tf.global_variables_initializer())
                init_fn(sess)
                for epoch in range(NUM_EPOCHS):
                    batch_loss, batch_accuracy = np.zeros([TRAINING_STEPS, 1]), np.zeros([TRAINING_STEPS, 1])
                    #batch_my_loss, val_my_loss = np.zeros([TRAINING_STEPS, 1]), np.zeros([VALIDATION_STEPS, 1])
                    #val_accuracies, val_losses = np.zeros([VALIDATION_STEPS, 1]), np.zeros([VALIDATION_STEPS, 1])
                    for batch_counter in range(TRAINING_STEPS):
                        batch = sess.run(training_next_element)
                        batch_images = batch[0]
                        batch_labels = np.squeeze(batch[1])
                        batch_negative_weights = np.squeeze(batch[2])
                        summary, batch_loss[batch_counter], batch_accuracy[batch_counter], _ \
                        = sess.run([merged, my_weights_loss, accuracy, train_step],
                                   feed_dict={input_images:batch_images,
                                              input_labels:batch_labels,
                                              negative_weights: batch_negative_weights,
                                              train_phase:True})
                    print("Epoch #{}".format(epoch+1), "Loss: {:.4f}".format(np.mean(batch_loss)),
                          "accuracy: {:.4f}".format(np.mean(batch_accuracy)))
                    epoch_losses_history.append(np.mean(batch_loss))
                    epoch_accurcies_history.append(np.mean(batch_accuracy))
                    #my_loss_history.append(np.mean(batch_my_loss))
                    # Add to summaries
                    train_writer.add_summary(summary, epoch)
                    
                os.makedirs(os.path.join(PRETRAINED_MODEL_DIR,EXPERIMENTNAME,experiment_name))
                save_path = saver.save(sess, os.path.join(PRETRAINED_MODEL_DIR,EXPERIMENTNAME, 
                                              experiment_name, "last_epoch.ckpt"))
                
                # Testing the model
                test_split = '/srv/workspace/research/mlml/mlml_weightedLoss/labels/missing_labels'+ \
                                    str(round(ratio, 1))+'/test' + str(split) + "_" + str(round(ratio, 1)) + '.csv'
                global_labels = pd.read_csv(test_split)
                test_dataset = get_dataset(test_split)
                test_classes = np.zeros_like(global_labels.iloc[:,1:].values, dtype=float)
                #test_images, test_classes = load_test_set_raw(test_split)

                TEST_NUM_STEPS = int(np.floor((len(global_labels)/BATCH_SIZE)))
                #split_size = int(len(test_classes) / TEST_NUM_STEPS)
                test_pred_prob = np.zeros_like(test_classes, dtype=float)
                test_iterator = test_dataset.make_one_shot_iterator()
                test_next_element = test_iterator.get_next()
                
                for test_batch_counter in range(TEST_NUM_STEPS):
                    start_idx = (test_batch_counter * BATCH_SIZE)
                    end_idx = (test_batch_counter * BATCH_SIZE) + BATCH_SIZE
                    test_batch = sess.run(test_next_element)
                    test_batch_images = test_batch[0]
                    test_batch_labels = np.squeeze(test_batch[1])
                    test_classes[start_idx:end_idx,:] = test_batch_labels
                    test_pred_prob[start_idx:end_idx,:] = sess.run(probabilities,
                                                             feed_dict={input_images:test_batch_images,train_phase:False})

                accuracy_out, auc_roc, hamming_error = evaluate_model(test_pred_prob, test_classes,
                                                                  saving_path=exp_dir,
                                                                  evaluation_file_path= \
                                                                  os.path.join(exp_dir,"evaluation_results.txt"))           
                results = create_analysis_report(test_pred_prob, test_classes, exp_dir, LABELS_LIST)

            # Plot and save losses
            plot_loss_acuracy(epoch_losses_history, epoch_accurcies_history,val_losses_history, 
                              val_accuracies_history,exp_dir)

Current Experiment: weighted_CE_ratio0.0split_1



INFO:tensorflow:Summary name Weighted cross entropy is illegal; using Weighted_cross_entropy instead.
Execute the following in a terminal:
tensorboard --logdir=/srv/workspace/research/mlml/experiments_results/weighted_CE_ratio0.0split_1/14-12_15-10
INFO:tensorflow:Restoring parameters from /srv/workspace/research/mlml/pretrained_models/inception_v1.ckpt
Epoch #1 Loss: 1.1443 accuracy: 0.6862
Epoch #2 Loss: 0.8454 accuracy: 0.8167
Epoch #3 Loss: 0.7388 accuracy: 0.8415
Epoch #4 Loss: 0.6832 accuracy: 0.8533
Epoch #5 Loss: 0.6475 accuracy: 0.8610
Exact match accuracy is: 0.7987711213517665%
Macro Area Under the Curve (AUC) is: 0.9236844723372624
Micro Area Under the Curve (AUC) is: 0.9378965579710195
Weighted Area Under the Curve (AUC) is: 0.9063294168271168
Hamming Loss (ratio of incorrect tags) is: 0.1370532514080901
saving prediction to disk
Current Experiment: weighted_CE_ratio0.2split_1



INFO:tensorflow:Summary name Weighted cross

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Current Experiment: weighted_CE_ratio0.8split_1



INFO:tensorflow:Summary name Weighted cross entropy is illegal; using Weighted_cross_entropy instead.
Execute the following in a terminal:
tensorboard --logdir=/srv/workspace/research/mlml/experiments_results/weighted_CE_ratio0.8split_1/15-12_08-54
INFO:tensorflow:Restoring parameters from /srv/workspace/research/mlml/pretrained_models/inception_v1.ckpt
Epoch #1 Loss: 0.4662 accuracy: 0.9758
Epoch #2 Loss: 0.3966 accuracy: 0.9908
Epoch #3 Loss: 0.3597 accuracy: 0.9858
Epoch #4 Loss: 0.3304 accuracy: 0.9808
Epoch #5 Loss: 0.3083 accuracy: 0.9770
Exact match accuracy is: 2.048131080389145%
Macro Area Under the Curve (AUC) is: 0.8627380792761077
Micro Area Under the Curve (AUC) is: 0.8918525547805205
Weighted Area Under the Curve (AUC) is: 0.8610646645922002
Hamming Loss (ratio of incorrect tags) is: 0.03789938556067588
saving prediction to disk


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Current Experiment: weighted_CE_ratio0.0split_2



INFO:tensorflow:Summary name Weighted cross entropy is illegal; using Weighted_cross_entropy instead.
Execute the following in a terminal:
tensorboard --logdir=/srv/workspace/research/mlml/experiments_results/weighted_CE_ratio0.0split_2/15-12_13-18
INFO:tensorflow:Restoring parameters from /srv/workspace/research/mlml/pretrained_models/inception_v1.ckpt
Epoch #1 Loss: 1.1382 accuracy: 0.6887
Epoch #2 Loss: 0.8374 accuracy: 0.8212
Epoch #3 Loss: 0.7349 accuracy: 0.8434
Epoch #4 Loss: 0.6809 accuracy: 0.8542
Epoch #5 Loss: 0.6465 accuracy: 0.8614
Exact match accuracy is: 0.6460654953168797%
Macro Area Under the Curve (AUC) is: 0.9226805418736234
Micro Area Under the Curve (AUC) is: 0.9362047660651625
Weighted Area Under the Curve (AUC) is: 0.9040611820609085
Hamming Loss (ratio of incorrect tags) is: 0.13461535174676967
saving prediction to disk




Current Experiment: weighted_CE_ratio0.2split_2



INFO:tensorflow:Summary name Weighted cross entropy is illegal; using Weighted_cross_entropy instead.
Execute the following in a terminal:
tensorboard --logdir=/srv/workspace/research/mlml/experiments_results/weighted_CE_ratio0.2split_2/15-12_17-46
INFO:tensorflow:Restoring parameters from /srv/workspace/research/mlml/pretrained_models/inception_v1.ckpt
Epoch #1 Loss: 1.0507 accuracy: 0.7561
Epoch #2 Loss: 0.7816 accuracy: 0.8481
Epoch #3 Loss: 0.6803 accuracy: 0.8639
Epoch #4 Loss: 0.6268 accuracy: 0.8719
Epoch #5 Loss: 0.5938 accuracy: 0.8774
Exact match accuracy is: 1.0870308333903056%
Macro Area Under the Curve (AUC) is: 0.9205324064331162
Micro Area Under the Curve (AUC) is: 0.9345542284451157
Weighted Area Under the Curve (AUC) is: 0.9024227394001353
Hamming Loss (ratio of incorrect tags) is: 0.11617129281465782
saving prediction to disk




Current Experiment: weighted_CE_ratio0.4split_2



INFO:tensorflow:Summary name Weighted cross entropy is illegal; using Weighted_cross_entropy instead.
Execute the following in a terminal:
tensorboard --logdir=/srv/workspace/research/mlml/experiments_results/weighted_CE_ratio0.4split_2/15-12_22-11
INFO:tensorflow:Restoring parameters from /srv/workspace/research/mlml/pretrained_models/inception_v1.ckpt
Epoch #1 Loss: 0.9269 accuracy: 0.8475
Epoch #2 Loss: 0.6985 accuracy: 0.8932
Epoch #3 Loss: 0.6059 accuracy: 0.8953
Epoch #4 Loss: 0.5540 accuracy: 0.8982
Epoch #5 Loss: 0.5229 accuracy: 0.9002
Exact match accuracy is: 2.310795104942914%
Macro Area Under the Curve (AUC) is: 0.919449152223482
Micro Area Under the Curve (AUC) is: 0.9339617294968425
Weighted Area Under the Curve (AUC) is: 0.9011056765464368
Hamming Loss (ratio of incorrect tags) is: 0.09339192930881247
saving prediction to disk




Current Experiment: weighted_CE_ratio0.6split_2



INFO:tensorflow:Summary name Weighted cross entropy is illegal; using Weighted_cross_entropy instead.
Execute the following in a terminal:
tensorboard --logdir=/srv/workspace/research/mlml/experiments_results/weighted_CE_ratio0.6split_2/16-12_02-42
INFO:tensorflow:Restoring parameters from /srv/workspace/research/mlml/pretrained_models/inception_v1.ckpt
Epoch #1 Loss: 0.7273 accuracy: 0.9445
Epoch #2 Loss: 0.5821 accuracy: 0.9511
Epoch #3 Loss: 0.5063 accuracy: 0.9402
Epoch #4 Loss: 0.4613 accuracy: 0.9355
Epoch #5 Loss: 0.4315 accuracy: 0.9333
Exact match accuracy is: 4.683120257058864%
Macro Area Under the Curve (AUC) is: 0.9093511750134031
Micro Area Under the Curve (AUC) is: 0.9255430276583076
Weighted Area Under the Curve (AUC) is: 0.8909866183847662
Hamming Loss (ratio of incorrect tags) is: 0.06453861010460107
saving prediction to disk


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Current Experiment: weighted_CE_ratio0.8split_2



INFO:tensorflow:Summary name Weighted cross entropy is illegal; using Weighted_cross_entropy instead.
Execute the following in a terminal:
tensorboard --logdir=/srv/workspace/research/mlml/experiments_results/weighted_CE_ratio0.8split_2/16-12_07-11
INFO:tensorflow:Restoring parameters from /srv/workspace/research/mlml/pretrained_models/inception_v1.ckpt
Epoch #1 Loss: 0.4690 accuracy: 0.9762
Epoch #2 Loss: 0.4046 accuracy: 0.9914
Epoch #3 Loss: 0.3637 accuracy: 0.9872
Epoch #4 Loss: 0.3333 accuracy: 0.9823
Epoch #5 Loss: 0.3122 accuracy: 0.9781
Exact match accuracy is: 2.1603883229643808%
Macro Area Under the Curve (AUC) is: 0.8597628663402512
Micro Area Under the Curve (AUC) is: 0.8881737082748263
Weighted Area Under the Curve (AUC) is: 0.857939412470936
Hamming Loss (ratio of incorrect tags) is: 0.037614086962466674
saving prediction to disk


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Current Experiment: weighted_CE_ratio0.0split_3



INFO:tensorflow:Summary name Weighted cross entropy is illegal; using Weighted_cross_entropy instead.
Execute the following in a terminal:
tensorboard --logdir=/srv/workspace/research/mlml/experiments_results/weighted_CE_ratio0.0split_3/16-12_11-34
INFO:tensorflow:Restoring parameters from /srv/workspace/research/mlml/pretrained_models/inception_v1.ckpt
Epoch #1 Loss: 1.1329 accuracy: 0.6979
Epoch #2 Loss: 0.8372 accuracy: 0.8224
Epoch #3 Loss: 0.7343 accuracy: 0.8436
Epoch #4 Loss: 0.6799 accuracy: 0.8550
Epoch #5 Loss: 0.6469 accuracy: 0.8620
Exact match accuracy is: 0.5654143533499097%
Macro Area Under the Curve (AUC) is: 0.9246730997310525
Micro Area Under the Curve (AUC) is: 0.9385717508490926
Weighted Area Under the Curve (AUC) is: 0.9065649476862961
Hamming Loss (ratio of incorrect tags) is: 0.14029854559078989
saving prediction to disk




Current Experiment: weighted_CE_ratio0.2split_3



INFO:tensorflow:Summary name Weighted cross entropy is illegal; using Weighted_cross_entropy instead.
Execute the following in a terminal:
tensorboard --logdir=/srv/workspace/research/mlml/experiments_results/weighted_CE_ratio0.2split_3/16-12_15-57
INFO:tensorflow:Restoring parameters from /srv/workspace/research/mlml/pretrained_models/inception_v1.ckpt
Epoch #1 Loss: 1.0423 accuracy: 0.7595


## testing trained model

In [None]:
# testing model
EXPERIMENTNAME = "original_CE_ratio" + str(round(0.0, 1)) + "split_" + str(1)
print("Current Experiment: " + EXPERIMENTNAME + "\n\n\n")
# Setting up saving directory
experiment_name = '2019-12-11_00-22-05'
exp_dir = os.path.join(OUTPUT_PATH, EXPERIMENTNAME, experiment_name)

with tf.Graph().as_default():
    # Setting up variables
    input_labels = tf.placeholder(tf.float32, [None, NUM_CLASSES], name="true_labels")
    input_images = tf.placeholder(tf.float32, [None, 224, 224, 3], name="input")
    train_phase = tf.placeholder(tf.bool, name="is_training")

    # Create the model, use the default arg scope to configure the batch norm parameters.
    with slim.arg_scope(inception.inception_v1_arg_scope()):
        ignored_logits, end_points = inception.inception_v1(input_images, num_classes=NUM_CLASSES, 
                                                   is_training=train_phase)

    featured_extracted = end_points['Mixed_5c']

    with tf.name_scope('trainable/Fully_connected_1'):
        flattened = tf.reshape(featured_extracted, [-1, 7*7* 1024])
        fully1 = tf.nn.sigmoid(full_layer(flattened, 256))

    with tf.name_scope('trainable/Fully_connected_2'):
        #dropped = tf.nn.dropout(fully1, keep_prob=current_keep_prob)
        output_logits = full_layer(fully1, NUM_CLASSES)

    trainable_layers = [var for var in tf.global_variables() if ("trainable" in var.op.name)]
    # Defining loss and metrics
    # Define loss and training optimizer
    #loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output_logits, labels=input_labels))
    positive_imbalance_weights = tf.constant(Pos_balance_weights)
    loss = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(input_labels,output_logits,
                                                                   positive_imbalance_weights))
    probabilities = tf.nn.sigmoid(output_logits)

    global_step = tf.Variable(0, trainable=False)
    learning_rate = tf.train.exponential_decay(learning_rate=0.1, global_step=global_step, decay_steps=1000,
                                              decay_rate=0.95,staircase=True)
    train_step = tf.train.AdadeltaOptimizer(learning_rate).minimize(loss,var_list=trainable_layers)
    #my_weights_loss = weighted_loss(y_true= y, y_pred= model_output,
    #                             positive_weights= positive_weights, negative_weights= negative_weights)

    # define accuracy
    correct_prediction = tf.equal(tf.round(probabilities), input_labels)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()


    test_split = '/srv/workspace/research/mlml/mlml_weightedLoss/labels/missing_labels'+ \
                        str(round(0.0, 1))+'/test' + str(1) + "_" + str(round(0.0, 1)) + '.csv'
    global_labels = pd.read_csv(test_split)
    test_dataset = get_dataset(test_split)
    test_classes = np.zeros_like(global_labels.iloc[:,1:].values, dtype=float)
    #test_images, test_classes = load_test_set_raw(test_split)

    TEST_NUM_STEPS = int(np.floor((len(global_labels)/BATCH_SIZE))) # number is chosen based on testset size to be dividable
    test_pred_prob = np.zeros_like(test_classes, dtype=float)
    test_iterator = test_dataset.make_one_shot_iterator()
    test_next_element = test_iterator.get_next()

    SAVED_MODEL_PATH = os.path.join(PRETRAINED_MODEL_DIR,
                                    'original_CE_ratio0.2split_1/2019-12-11_00-22-05',"last_epoch.ckpt")


    with tf.Session() as sess:
        #sess.run(tf.global_variables_initializer())
        #init_fn(sess)     
        saver.restore(sess, SAVED_MODEL_PATH)
        print("Model with best validation restored before testing.")
                
        for test_batch_counter in range(TEST_NUM_STEPS):
            start_idx = (test_batch_counter * BATCH_SIZE)
            end_idx = (test_batch_counter * BATCH_SIZE) + BATCH_SIZE
            test_batch = sess.run(test_next_element)
            test_batch_images = test_batch[0]
            test_batch_labels = np.squeeze(test_batch[1])
            test_classes[start_idx:end_idx,:] = test_batch_labels
            test_pred_prob[start_idx:end_idx,:] = sess.run(probabilities,
                                                     feed_dict={input_images:test_batch_images,train_phase:False})
            if(test_batch_counter%1000 == 0):
                print(test_batch_counter)
        accuracy, auc_roc, hamming_error = evaluate_model(test_pred_prob, test_classes,
                                                          saving_path=exp_dir,
                                                          evaluation_file_path= \
                                                          os.path.join(exp_dir,"evaluation_results.txt"))           
        results = create_analysis_report(test_pred_prob, test_classes, exp_dir, LABELS_LIST)

    # Plot and save losses
    #plot_loss_acuracy(epoch_losses_history, epoch_accurcies_history,val_losses_history, 
    #                  val_accuracies_history,exp_dir)
