### Changable Variables
Set the path to the model and any unused categories to get information about the model performance

In [None]:
model_path = ""
unused_categories = [4, 5, 7, 8, 9, 10]

### Imports & Global Variables

In [None]:
from image_sequences import BasicImageSequence
from tensorflow.keras.models import Model, load_model
from pathlib import Path
from preliminary_caching import read_cached_data, filter_emotion_data
from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
import matplotlib.image as mpimg
import fairface

In [None]:
id_to_emotion = {
    0: "neutral",
    1: "happy",
    2: "sad",
    3: "surprise",
    4: "fear",
    5: "disgust",
    6: "angry", 
    7: "contempt", 
    8: "none",
    9: "uncertain",
    10: "noface"
}

### Generate Predictions

Predictions are generated on the validation set

In [None]:
model = load_model(model_path)

validation_data = read_cached_data("validation")

# Filter out emotions that the model may not have been trained on
if len(unused_categories) > 0:
    validation_data = filter_emotion_data(validation_data, unused_categories)

In [None]:
validation_generator = BasicImageSequence(validation_data)

# This can take a few minutes to run.
prediction = model.predict_generator(validation_generator, steps = len(validation_data))

The images are in the same order as the map for the first iteration.

In [None]:
predicted_classes = np.argmax(prediction, axis=1)
actual_classes = np.array(list(map(lambda it: it["emotion"], validation_data.values())))

In [None]:
difference = predicted_classes - actual_classes 
wrong = np.count_nonzero(difference)
correct = len(difference) - wrong

In [None]:
print(f"Accuracy: {correct / len(difference)}")

### Confusion Matrix

In [None]:
matrix = confusion_matrix(actual_classes, predicted_classes)

In [None]:
def heatmap(data,labels_x, labels_y,title, title_x,title_y):
    ax = sns.heatmap(data,annot = True, xticklabels=labels_x, yticklabels=labels_y)
    plt.title(title, fontsize = 20)
    plt.xlabel(title_x, fontsize = 15)
    plt.ylabel(title_y, fontsize = 15)
    plt.show()

In [None]:
label_ids = [x for x in list(range(11)) if x not in unused_categories]
labels = list(map(lambda x: id_to_emotion[x], label_ids))
heatmap(matrix, labels, labels, "Confusion Matrix", "Predicted", "Actual")

### Calculate P Values

In [None]:
from scipy.stats import chi2_contingency

wrong = matrix[np.logical_not(np.eye(matrix.shape[0],dtype = bool))].reshape((matrix.shape[0],matrix.shape[1]-1))
uniform = np.ones(wrong.shape)*np.ceil(np.sum(wrong,axis = 1)[:,np.newaxis]/wrong.shape[1])
p_values = []
for i in range(wrong.shape[0]):
    _,p,_,_ = chi2_contingency([wrong[i],uniform[i]])
    p_values.append(p)
print(p_values)

### Visualize Wrong Images

In [None]:
wrong_indexs = np.nonzero(difference)[0]

def visualize_wrong_image(index):
    
    predicted = id_to_emotion[predicted_classes[index]]
    actual = id_to_emotion[actual_classes[index]]
    image_path = list(validation_data.keys())[index]
    
    image = mpimg.imread(image_path)
    plt.title(f"Actual: {actual} Predicted: {predicted}")
    plt.imshow(image)
    plt.show()

In [None]:
random_index = random.randint(0, len(wrong_indexs))
visualize_wrong_image(wrong_indexs[random_index])

### Visualize Accuracy Stratification

In [None]:
def stacked_barchart(data, labels_x, labels_y,title, title_x,title_y):
    figure = plt.figure()
    figure.set_figwidth(10)
    figure.set_figheight(10)
    xloc = np.arange(len(labels_x))
    plt.xticks(xloc,labels_x)
    legend = []
    bottom = np.zeros(len(labels_x))
    for i in range(data.shape[0]):
        p = plt.bar(xloc,data[i],bottom = bottom, width = 0.5)
        bottom += data[i]
        legend.append(p[0])
    plt.legend(legend,labels_y)
    plt.title(title, fontsize = 20)
    plt.xlabel(title_x, fontsize = 15) 
    plt.ylabel(title_y, fontsize = 15) 
    plt.show()

In [None]:
def stratify_accuracy(class_name, name_mapping_function):
    
    total_predictions = dict()
    correct_predictions = dict()
    
    for index, image_data in enumerate(validation_data.values()):
        class_id = image_data[class_name]
        total_predictions[class_id] = total_predictions.get(class_id, 0) + 1
        
        if difference[index] == 0:
            correct_predictions[class_id] = correct_predictions.get(class_id, 0) + 1
        
    accuracies = dict()
        
    for class_id in total_predictions:
        
        accuracy = correct_predictions.get(class_id, 0) / total_predictions[class_id] 
        label = name_mapping_function(class_id)
        accuracies[label] = accuracy
    
    return accuracies

gender_accuracies = stratify_accuracy("gender", fairface.gender_label)
age_accuracies = stratify_accuracy("age", fairface.age_label)
race7_accuracies = stratify_accuracy("race", fairface.race7_label)

In [None]:
stacked_barchart(
    np.array([
        list(gender_accuracies.values()),
        list(map(lambda x: 1 - x, gender_accuracies.values()))
    ]),
    list(gender_accuracies.keys()),
    ["Correct", "Wrong"],
    "Accuracies by Gender",
    "Predicted Labels",
    "Actual Labels"
)

In [None]:
stacked_barchart(
    np.array([
        list(age_accuracies.values()),
        list(map(lambda x: 1 - x, age_accuracies.values()))
    ]),
    list(age_accuracies.keys()),
    ["Correct", "Wrong"],
    "Accuracies by Age",
    "Predicted Labels",
    "Actual Labels"
)

In [None]:
stacked_barchart(
    np.array([
        list(race7_accuracies.values()),
        list(map(lambda x: 1 - x, race7_accuracies.values()))
    ]),
    list(race7_accuracies.keys()),
    ["Correct", "Wrong"],
    "Accuracies by Race",
    "Predicted Labels",
    "Actual Labels"
)