In [1]:
import os
import sys

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Ajoute le package local `src` pour pouvoir l'utiliser dans un notebook
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from src.data.malimg import load_malimg

In [None]:
def one_hot_encode(labels):
    """
    :param labels:
    :return:
    """
    one_hot = np.zeros((labels.shape[0], labels.max() + 1))
    one_hot[np.arange(labels.shape[0]), labels] = 1
    labels = one_hot
    labels[labels == 0] = -1
    return labels

def plot_confusion_matrix(phase, path, class_names):
    """Plots the confusion matrix using matplotlib.
    Parameter
    ---------
    phase : str
      String value indicating for what phase is the confusion matrix, i.e. training/validation/testing
    path : str
      Directory where the predicted and actual label NPY files reside
    class_names : str
      List consisting of the class names for the labels
    Returns
    -------
    conf : array, shape = [num_classes, num_classes]
      Confusion matrix
    accuracy : float
      Predictive accuracy
    """

    # list all the results files
    files = list_files(path=path)

    labels = np.array([])

    for file in files:
        labels_batch = np.load(file)
        labels = np.append(labels, labels_batch)

        if (files.index(file) / files.__len__()) % 0.2 == 0:
            print('Done appending {}% of {}'.format((files.index(file) / files.__len__()) * 100, files.__len__()))
    
    labels = np.reshape(labels, newshape=(labels.shape[0] // 50, 50))

    print('Done appending NPY files.')

    # get the predicted labels
    predictions = labels[:, :25]

    # get the actual labels
    actual = labels[:, 25:]

    # create a TensorFlow session
    with tf.Session() as sess:

        # decode the one-hot encoded labels to single integer
        predictions = sess.run(tf.argmax(predictions, 1))
        actual = sess.run(tf.argmax(actual, 1))

    # get the confusion matrix based on the actual and predicted labels
    conf = confusion_matrix(y_true=actual, y_pred=predictions)
    
    # get the classification report on the actual and predicted labels
    report = classification_report(y_true=actual, y_pred=predictions, target_names=class_names)

    # create a confusion matrix plot
    plt.imshow(conf, cmap=plt.cm.Purples, interpolation='nearest')

    # set the plot title
    plt.title('Confusion Matrix for {} Phase'.format(phase))

    # legend of intensity for the plot
    plt.colorbar()

    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names, rotation=45)
    plt.yticks(tick_marks, class_names)

    plt.tight_layout()
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')

    # show the plot
    plt.show()

    # get the accuracy of the phase
    accuracy = accuracy_score(y_pred=predictions, y_true=actual)

    # return the confusion matrix, the accuracy, and the classification report
return conf, accuracy, report