In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras.utils import to_categorical, img_to_array, image_dataset_from_directory
from keras.models import load_model
from keras.preprocessing import image
from keras import layers
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

### 1. Load images and labels as Numpy array

In [None]:
def data_loader_np_array(path, map_labels, target_size, verbose=True):
    """Function to load the images organized by label within a given path.
       Args:
           path: full directory to image folders by class.
           map_labels: dictionary to map labels to classes.
           target_size: final pixel size of images.
           verbose: if True, shows the count of loaded images per label.
       Returns:
           features: numpy array of transformed images.
           target_vector: numpy array of one-hot encoded labels.
    """

    # feature and label lists
    data = []
    labels = []

    # read images from directory
    for label, emotion in map_labels.items():
        files = os.listdir(os.path.join(path, emotion).replace("\\", "/"))
        images = [file for file in files if file.endswith("jpg")]
        if verbose:
            print("Reading {} images found for {}".format(len(images), emotion))
        for image_name in images:
            image = cv2.imread(os.path.join(path, emotion, image_name).replace("\\", "/"))
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            image = cv2.resize(image, (target_size, target_size))
            image = image.reshape(target_size, target_size, 1)/255.0
            data.append(image)
            labels.append(label)

    # to numpy arrays and apply one-hot enconding
    data, labels = np.array(data), to_categorical(np.array(labels))

    # shuffle dataset
    perm = np.random.permutation(len(data))
    features, target_vector = np.array(data)[perm], labels[perm]

    return features, target_vector

### 2. Load images and labels as TensorFlow dataset

In [None]:
def data_loader_tf_dataset(path, batch_size, target_size, augmentation=False, shuffle=True, transfer_learning=False):
    """Function to 1) apply data augmentation; 2) trasform images to grayscale and scale to (0, 1);
       and 3) to apply one hot encoding to labels.
        Args:
            path: full directory to read images from.
            batch_size: the number of images per batch.
            target_size: final pixel size of images.
            augmentation: if True, data augmentation is applied.
            shuffle: if True, the dataset elements are shuffled.
        Returns:
            dataset: a A tf.data.Dataset object.  it yields a tuple (images, labels),
            where images has shape (batch_size, image_size[0], image_size[1], 1),
            and labels are a float32 tensor of shape (batch_size, num_classes),
            representing a one-hot encoding of the class index.
    """
    # data transformer
    if augmentation:
        data_augmentation = keras.models.Sequential()
        data_augmentation.add(layers.Resizing(target_size, target_size))
        data_augmentation.add(layers.Rescaling(scale=1.0/255))
        data_augmentation.add(layers.RandomRotation(factor=0.17)) # 10 degrees = 0.17(2*pi)
        data_augmentation.add(layers.RandomFlip("horizontal"))
        data_augmentation.add(layers.RandomZoom(height_factor=0.1, width_factor=0.1))
    else:
        data_augmentation = keras.models.Sequential()
        data_augmentation.add(layers.Resizing(target_size, target_size))
        data_augmentation.add(layers.Rescaling(scale=1.0/255))

    # image filters for transfer learning
    if transfer_learning:
        color_mode = "rgb"
    else:
        color_mode = "grayscale"

    # load data from directory and transform
    dataset = image_dataset_from_directory(path,
                                           labels="inferred",
                                           label_mode="categorical",
                                           image_size=(target_size, target_size),
                                           color_mode=color_mode,
                                           batch_size=batch_size,
                                           shuffle=shuffle)

    # cache dataset for gpu computing
    dataset = dataset.map(lambda x, y: (data_augmentation(x), y),
                          num_parallel_calls=tf.data.AUTOTUNE)\
                     .cache()\
                     .prefetch(tf.data.AUTOTUNE)

    return dataset

### 3. Load single model to predict

In [None]:
def load_predict(model, dataset):
    """Function to to load a model and make predictions.
        Args:
            path: full directory to model.
            model: name of the model to be loaded.
            dataset: a A tf.data.Dataset object.  it yields a tuple (images, labels),
            where images has shape (batch_size, image_size[0], image_size[1], 1),
            and labels are a float32 tensor of shape (batch_size, num_classes),
            representing a one-hot encoding of the class index.
        Returns:
            y_true: true labels.
            y_pred: predicted labels.
    """

    # get true labels on data set
    y_true = np.asarray(list(dataset.unbatch().map(lambda x, y: y)))

    # get true labels from one-hot encoded results
    y_true = [np.argmax(i) for i in y_true]

    # predictions on dataset
    y_pred = model.predict(dataset)

    # get most probable predicted label
    y_pred = [np.argmax(i) for i in y_pred]

    return y_true, y_pred

### 4. Load and ensemble models to predict

In [None]:
def load_ensemble_predict(path, model_list, dataset):
    """Function to to load a model and make predictions.
        Args:
            path: full directory to folders containing each model.
            model: name of the model to be loaded.
            dataset: a A tf.data.Dataset object.  it yields a tuple (images, labels),
            where images has shape (batch_size, image_size[0], image_size[1], 1),
            and labels are a float32 tensor of shape (batch_size, num_classes),
            representing a one-hot encoding of the class index.
        Returns:
            y_true: true labels.
            y_pred: predicted labels.
    """

    # load model
    model = load_model("{}/{}/{}.h5".format(path, model, model))

    # get true labels on data set
    y_true = np.asarray(list(dataset.unbatch().map(lambda x, y: y)))

    # get true labels from one-hot encoded results
    y_true = [np.argmax(i) for i in y_true]

    # predictions on dataset
    y_pred = model.predict(dataset)

    # get most probable predicted label
    y_pred = [np.argmax(i) for i in y_pred]

    return y_true, y_pred

### 4. Plot learning curves

In [None]:
def learning_curve(history, epochs):
    """Function to plot learning curves
        Args:
            history: history of CNN model trained with Keras
        Returns:
            plot of the learning curves for the classifier
    """
    fig = plt.figure(figsize=(7, 5))
    ax = plt.axes()
    pd.DataFrame(history.history).plot(ax=ax)
    ax.grid(True)
    ax.set_xlim(0, epochs)
    ax.set_ylim(0, 2)
    plt.show()

In [None]:
def learning_curve_from_file(path, model_name):
    """Function to plot learning curves
        Args:
            path: path to files
            model_name: name of the model
        Returns:
            plot of the learning curves for the classifier
    """
    # load data
    df = pd.read_csv("{}/{}/{}_lc.csv".format(path, model_name, model_name))
    df["epoch"] = df.index

    # plot data
    n_epochs = df["epoch"].max() + 1
    fig, ax = plt.subplots(figsize=(5, 3))
    df.plot(x="epoch", y=["loss", "val_loss"], ax=ax,
            xlim=(0, n_epochs), ylim=(0.5, 2),
            xlabel="epoca", ylabel="pérdida",
            grid=True, color=["blue", "red"])
    ax.legend(["train", "validation"], frameon=False)
    plt.rcParams['figure.dpi'] = 100

### 5. Confusion matrix

In [None]:
def confusion_matrix_report(model, dataset, label_dictionary):
    # load model and make predictions
    true_labels, predicted_labels = load_predict(model, dataset)

    # confusion matrix
    matrix = confusion_matrix([label_dictionary[i] for i in true_labels], [label_dictionary[i] for i in predicted_labels])

    # classification report
    class_report = classification_report([label_dictionary[i] for i in true_labels], [label_dictionary[i] for i in predicted_labels])

    # print confusion matrix and classification report
    print('Matrix de confusión')
    print()
    fig = plt.figure(figsize=(4, 3))
    sns.heatmap(matrix, annot=True, fmt="d", annot_kws={"size": 8},
                xticklabels=[label_dictionary[i] for i in range(len(label_dictionary))],
                yticklabels=[label_dictionary[i] for i in range(len(label_dictionary))])
    plt.xlabel("Clases predichas")
    plt.xticks(rotation=45, ha='right')
    plt.ylabel("Clases reales")
    plt.show()
    print()
    print()
    print('Reporte clasificacion:')
    print()
    print(class_report)