## Outline
1. Import libraries and initialise global variables
2. Load data
3. Data augmentation
4. Load base models
5. Model creation using transfer learning
    - Base models (from step 4) are used here
6. Model training
7. Model Analysis
    - Get model statistics
8. Findings and results

In [None]:
from tensorflow import keras
from tensorflow.keras.preprocessing import image_dataset_from_directory
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from gen_results import gen_save_cr_cm # Load test results

from tqdm import tqdm            # Progress bar
from pathlib import Path         # Create new folder if does not exist

import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import os
import json
import ssl
import time

# Set if memory growth should be enabled for a PhysicalDevice.
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

ssl._create_default_https_context = ssl._create_unverified_context

%load_ext autoreload
%autoreload 2

In [None]:
os.chdir("../") # back to root

In [None]:
# Initialise global variables
AUTOTUNE = tf.data.AUTOTUNE
BATCH_SIZE = 64
EPOCHS = 500
IMG_SIZE = (224,224)
IMG_SHAPE = IMG_SIZE + (3,)
LABELS = ["female", "male"]
set_nums = [1,5,8,10]            # List of set numbers
experiments_folder_1 = "experiments_20112021"
experiments_folder = "experiments_12122021"
exp = f'/home/monash/Desktop/fyp-work/fyp-ma-13/fyp-models/preprocessing/{experiments_folder_1}'

In [None]:
# Load MobileNetV3 Large base model
preprocess_input_mobile = tf.keras.applications.mobilenet_v3.preprocess_input
base_model_mobile = tf.keras.applications.MobileNetV3Large(
    input_shape=IMG_SHAPE, include_top=False, weights='imagenet')

# Load DenseNet 201 base model
preprocess_input_dense = tf.keras.applications.densenet.preprocess_input
base_model_dense = tf.keras.applications.densenet.DenseNet201(
    input_shape=IMG_SHAPE, include_top=False, weights='imagenet')

# Load ResNet50 base model
preprocess_input_res = tf.keras.applications.resnet50.preprocess_input
base_model_res = tf.keras.applications.resnet50.ResNet50(
    input_shape=IMG_SHAPE, include_top=False, weights='imagenet')

In [None]:
def load_train_val_data(preprocessing_fp):
    """
    Loads in training and validation datasets from a specified file path
    
    preprocessing_fp : str
        File path to load data from (specific set)
    """
    print(f"loading train and val dataset from{preprocessing_fp}")
    # Load train dataset
    train_dataset = image_dataset_from_directory(os.path.join(preprocessing_fp, "train"),
                                                 shuffle=True,
                                                 batch_size=BATCH_SIZE,
                                                 image_size=IMG_SIZE)
    # Load validation dataset
    validation_dataset = image_dataset_from_directory(os.path.join(preprocessing_fp, "val"),
                                                      shuffle=True,
                                                      batch_size=BATCH_SIZE,
                                                      image_size=IMG_SIZE)
    
    # Data augmentation
    train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
    validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)
    data_augmentation = tf.keras.Sequential([
      tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
      tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
    ])
    return train_dataset, validation_dataset, data_augmentation

## Model Creation

In [None]:
def create_model(base_model, preprocess_input, train_dataset, data_augmentation):
    """
    Creates a new neural network model applying transfer learning.
    
    base_model : tf.keras.Model
        Base model we use for transfer learning
    preprocess_input : Function
        Function to perform preprocessing of input images for model compatibility
    train_dataset : 
        Training dataset
    """
    
    # Converts images into a 5x5x1280 block of features
    image_batch, label_batch = next(iter(train_dataset))
    feature_batch = base_model(image_batch)
    
    # Freeze all convolutional base
    base_model.trainable = False
    
    # Add classification head
    global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
    feature_batch_average = global_average_layer(feature_batch)
    
    # Model building
    inputs = tf.keras.Input(shape=(224, 224, 3))
    x = data_augmentation(inputs)
    x = preprocess_input(x)
    x = base_model(x, training=False)
    x = global_average_layer(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(1024, kernel_regularizer='l2', activation='relu')(x)
    x = tf.keras.layers.Dense(1024, kernel_regularizer='l2', activation='relu')(x)
    x = tf.keras.layers.Dense(512, kernel_regularizer='l2', activation='relu')(x)
    outputs = keras.layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs, outputs)

    # Compile the model
    base_learning_rate = 0.0001
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  metrics=['accuracy'])
    return model

## Model Training

In [None]:
def model_training(model, set_no, model_type, train_dataset, validation_dataset, male_perc, female_perc, male_pert, female_pert):
    """
    Trains model, and saves model's best weights and history
    
    model : 
        Model to train 
    set_no : int
        Set number
    model_type : str
        Type of model (i.e. 'mobile', 'dense', 'res')
    """
    assert type(male_perc) == float or male_perc == 'ori'
    assert type(female_perc) == float or female_perc == 'ori'
    male_label = int(male_perc*100) if type(male_perc) == float else male_perc
    female_label = int(female_perc*100) if type(female_perc) == float else female_perc
    checkpoint = ModelCheckpoint(
        f"best_weights/male_{male_label}_{male_pert}_female_{female_label}_{female_pert}/set{set_no}/model_tl_best_weights_{model_type}_set{set_no}.h5",
        monitor="loss",
#         verbose=0,
        save_best_only=True,
        mode="min",
        save_freq="epoch",
    )

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                      mode='min',
                                                      patience=5)
    
    # Save a checkpoint of t/home/monash/Desktop/fyp-work/fyp-ma-13/fyp-models/preprocessing/experiments_20112021he model for later use
    start_time = time.time()
    history = model.fit(train_dataset,
                             epochs=EPOCHS,
                             validation_data=validation_dataset,
                            callbacks=[checkpoint, early_stopping],
#                             verbose=0
                       )
    time_taken = "%.2fs" % (time.time() - start_time)
    history.history['time_taken'] = time_taken

    
    target = f"history/male_{male_label}_{male_pert}_female_{female_label}_{female_pert}" # Store model history as a JSON file
    Path(target).mkdir(parents=True, exist_ok=True)
    with open(os.path.join(target, f"model_tl_history_{model_type}_set{set_no}.json"), "w+") as f:
        json.dump(history.history, f) # Construct the baseline (unperturbed) model
        
    return history

In [None]:
def find_best_weights_and_history(set_no, male_perc, female_perc, male_pert, female_pert):
    """
    Gets model best weights from training and history
    
    set_no : int
        Set number
    """
    assert type(male_perc) == float or male_perc == 'ori'
    assert type(female_perc) == float or female_perc == 'ori'
    male_label = int(male_perc*100) if type(male_perc) == float else male_perc
    female_label = int(female_perc*100) if type(female_perc) == float else female_perc
    preprocessing_fp = f'{exp}/male_{male_label}_{male_pert}_female_{female_label}_{female_pert}/set{set_no}'
    train_dataset, validation_dataset, data_aug = load_train_val_data(preprocessing_fp)
    # Create the three different models
    model_mobile = create_model(base_model_mobile, preprocess_input_mobile, train_dataset, data_aug)
    model_dense = create_model(base_model_dense, preprocess_input_dense, train_dataset, data_aug)
    model_res = create_model(base_model_res, preprocess_input_res, train_dataset, data_aug)
    
    history_mobile = model_training(model_mobile,  set_no, 'mobile', 
                                    train_dataset, validation_dataset, male_perc, female_perc, male_pert, female_pert)
    history_dense = model_training(model_dense, set_no, 'dense', 
                                   train_dataset, validation_dataset, male_perc, female_perc, male_pert, female_pert)
    history_res = model_training(model_res, set_no, 'res', 
                                 train_dataset, validation_dataset, male_perc, female_perc, male_pert, female_pert)
    return history_mobile, history_dense, history_res

In [None]:
def gen_models_all_sets(male_perc, female_perc, male_pert, female_pert):
    """
    This function does two things:
    1. Builds all three mode/home/monash/Desktop/fyp-work/fyp-ma-13/fyp-models/preprocessing/experiments_20112021l types (MobileNet, ResNet50, DenseNet)
    2. Saves model with the best weights and history
    
    debiased : boolean
        Determines whether we are training a debiased model or not
    EPOCHS : int
        Number of epochs
    """
    for set_no in tqdm(set_nums, "Loading models..."):
        print("Training...")
        print(f"Male perturbation: {male_pert}, Female perturbation: {female_pert}")
        histories = find_best_weights_and_history(set_no, male_perc, female_perc, male_pert, female_pert)
        print("Saving...")
        print("--------------------------------")
    return histories

# Model Analysis

In [None]:
def get_all_models(set_no, male_label, female_label, male_pert, female_pert, best_w_fp="best_weights/"):
    """
    Returns a list of Keras models from a specific set 
    
    set_no : int
        Set number
    best_w_fp : str
        File path where the best weights of the models for the particular set is stored
    """
    mf = f'male_{male_label}_{male_pert}_female_{female_label}_{female_pert}'
    target = f'{best_w_fp}{mf}/set{set_no}'
    mobilenet = tf.keras.models.load_model(f'{target}/model_tl_best_weights_mobile_set{set_no}.h5')
    densenet = tf.keras.models.load_model(f'{target}/model_tl_best_weights_dense_set{set_no}.h5')
    resnet = tf.keras.models.load_model(f'{target}/model_tl_best_weights_res_set{set_no}.h5')
    print("best weight path:", target)
    all_models = [mobilenet, densenet, resnet]
    return all_models

In [None]:
def gen_result_for_sets(all_models, original, target, test_pert):
    """
    Call this function to generate Classification Reports and confusion Matrix results
    
    all_models : list
        List of Keras models
    original : str
        Original file path
    target : str
        Target file path
    """
    print(f"original path: {original}, target path: {target}")
    # Classification reports and confusion matrices for MobileNet
    cr_mobile_all, cm_mobile_all = gen_save_cr_cm('mobile', all_models, original, target, gender=None, test_pert=test_pert) # Both
    # Classification reports and confusion matrices for DenseNet
    cr_dense_all, cm_dense_all = gen_save_cr_cm('dense', all_models, original, target, gender=None, test_pert=test_pert) # Both
    # Classification reports and confusion matrices for ResNet
    cr_res_all, cm_res_all = gen_save_cr_cm('res', all_models, original, target, gender=None, test_pert=test_pert) # Both

In [None]:
def gen_cr_cm_results(male_perc, female_perc, male_pert, female_pert, test_pert, original_folder = "preprocessing/cv_datasets/", target_folder = "cr_cm_results/"):
    """
    Generates cr_cm_results
    
    male_perc : float
        Percentage of male data to be perturbed
    female_perc : float
        Percentage of female data to be perturbed
    original_folder : str
        File path to folder containing original image
    target_folder : str
        File path to folder to store classification report (CR) and confusion matrix (CM)
    """
    assert type(male_perc) == float or male_perc == 'ori'
    assert type(female_perc) == float or female_perc == 'ori'
    male_label = int(male_perc*100) if type(male_perc) == float else male_perc
    female_label = int(female_perc*100) if type(female_perc) == float else female_perc
    target_folder = f"cr_cm_results/male_{male_label}_{male_pert}_female_{female_label}_{female_pert}/"
    for i in tqdm(range(len(set_nums)), "Generating results..."):
        set_no = set_nums[i]
        original = f'{original_folder}set{set_no}/'                  # Where is it coming from?
        target = f'{target_folder}set{set_no}/'                      # Where do you want to store the results?
        Path(target).mkdir(parents=True, exist_ok=True)              # Make new directory if empty
        all_models = get_all_models(set_no, male_label, female_label, male_pert, female_pert)  # Grab all models, MobileNet, DenseNet, ResNet50
        gen_result_for_sets(all_models, original, target, test_pert)

In [None]:
percs = [0.1, 0.25, 0.5]

pert_types = ['glasses', 'makeup', 'masked', 'ori']
   
# gen_models_all_sets('ori', 'ori', 'ori', 'ori')   
for perc in percs:
    for t in pert_types:
        gen_cr_cm_results(perc, perc, 'glasses', 'makeup', t)

#makeup
# for perc in percs:
#     gen_models_all_sets(perc, perc, 'makeup', 'makeup')
#     gen_cr_cm_results(perc, perc, 'makeup', 'makeup')

# gen_models_all_sets('ori', 'ori')
# gen_cr_cm_results('ori', 'ori')

# for male perturbation only
# for perc in tqdm(percs, "Male perturbation only..."):
#     gen_models_all_sets(perc, 'ori')
#     gen_cr_cm_results(perc, 'ori')

# for female perturbation only
# for perc in tqdm(percs, "Female perturbation only..."):
#     gen_models_all_sets('ori', perc)
#     gen_cr_cm_results('ori', perc)

# for both
# for perc in tqdm(percs, "Both perturbations..."):
#     gen_models_all_sets(perc, perc)
#     gen_cr_cm_results(perc, perc)

In [None]:
# glasses
for perc in percs:
    gen_models_all_sets(perc, perc, 'glasses', 'glasses')
    gen_cr_cm_results(perc, perc, 'glasses', 'glasses')

In [None]:
def load_metrics(model_type, target_folder, perturbation='ori'):
    """
    Loads in results from folder fyp-ma-13/fyp-models/cr_cm_results
    
    model_type : str
        Either 'mobile' (MobileNet), 'dense' (DenseNet) or 'res' (ResNet50)
    target_folder : str
        Target folder name from timeline 
    perturbation: str
        Either 'ori', 'masked', 'glasses', or 'make_up'
    """
    assert perturbation in ['ori', 'masked', 'glasses', 'make_up']
    
    with open("timeline/{}/cr_cm_results/set.../cr_cm_{}_{}_{}".format(target_folder, model_type, perturbation, 'bothg')) as json_file:
        data = json.load(json_file)
        data = json.loads(data)
    return data

In [None]:
def calculate_sum_cm(cm):
    """
    Gets total number of observations of input confusion matrix
    
    cm : list
        Confusion matrix
    """
    res = 0
    for i in data_mobile_both['cm_mobile_all']:
        res += sum(i)
    return res

def calculate_acc_cm(cm):
    """
    Gets total accuracy of input confusion matrix
    
    cm : list
        Confusion matrix
    """
    tot = calculate_sum_cm(cm)
    tn = cm[0][0]
    tp = cm[1][1]
    return ((tn+tp)/tot)*100

def calculate_female_acc(cm):
    """
    Gets female accuracy of input confusion matrix
    
    cm : list
        Confusion matrix
    """
    female_row = cm[0]
    return (female_row[0] / (female_row[0] + female_row[1]))*100
    
def calculate_male_acc(cm):
    """
    Gets male accuracy of input confusion matrix
    
    cm : list
        Confusion matrix
    """
    male_row = cm[1]
    return (male_row[1] / (male_row[0] + male_row[1]))*100

def calculate_precision(cm):
    """
    Gets precision of input confusion matrix
    
    cm : list
        Confusion matrix
    """
    return 100*cm[1][1]/(cm[0][1]+cm[1][1])

def calculate_recall(cm):
    """
    Gets recall of input confusion matrix
    
    cm : list
        Confusion matrix
    """
    return 100*cm[1][1]/(cm[1][0]+cm[1][1])

In [None]:
# Load results
data_mobile_both = load_metrics('mobile', debiased_fp)
data_dense_both = load_metrics('dense', debiased_fp)
data_res_both = load_metrics('res', debiased_fp)

In [None]:
def gen_stats_graph(data, history, model_type, debiased=False):
    """
    This function does two things:
        1. Generate plot showing model statistics and show
        2. Saves generated plotƒ
        
    data : dict
        Dictionary containing data on classification report and confusion matrix
    history : dict
        History of the model
    model_type : str
        Model type
    debiased : boolean
        Determines whether we are targetting a debiased or not
    """
    key = f'cm_%s_bothg' % model_type     # Key to access confusion matrix data from "data"
    
    acc = []
    val_acc = []
    loss = []
    val_loss = []

    acc += history['accuracy']
    val_acc += history['val_accuracy']

    loss += history['loss']
    val_loss += history['val_loss']
    
    plt.figure(figsize=(14, 8))
    plt.subplots_adjust(wspace=0.7, hspace= 0.4)

# ------------------------------------------------------------------------------------------------------------------
#   Plot accuracy training/validation graph
    plt.subplot(212)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.ylim([0.5, 1])
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

# ------------------------------------------------------------------------------------------------------------------
#   Plot confusion metric values
    plt.subplot(222)
    cols_barh = ['Male Accuracy', 'Female Accuracy', 'Precision', 'Recall', 'Gender Bias Index']
    
    
    cm = data[key]
    m_acc = round(calculate_male_acc(cm), 2)
    f_acc = round(calculate_female_acc(cm), 2)
    vals_barh = [m_acc, 
                 f_acc, 
                 round(calculate_precision(cm), 2),
                 round(calculate_recall(cm), 2),
                 round(f_acc - m_acc,2)]
    
    plt.barh(cols_barh, vals_barh)
    for index, value in enumerate(vals_barh):
        plt.text(value+0.3, index-0.2, str(value), fontweight='bold', ha='left',fontdict=dict(fontsize=12))
    
# ------------------------------------------------------------------------------------------------------------------
#   Plot confusion matrix
    plt.subplot(221)
    kw = key[3:].split('_')
    # Update title
    f = kw[0]
    if f == 'mobile':
        kw[0] = "MobileNet"
    elif f == 'dense':
        kw[0] = 'DenseNet'
    elif f == 'res':
        kw[0] = 'ResNet50'
    
    if kw[1] == 'all':
        kw[1] = 'both'
    kw = kw[0]
    cf_matrix = np.array(cm)
    
    group_names = ["TN", "FP", "FN", "TP"]
    group_counts = map(round, cf_matrix.flatten())
    group_percentages = (
        f"{round(value, 2)}%" for value in cf_matrix.flatten() / np.sum(cf_matrix)
    )
    df_cm = pd.DataFrame(cf_matrix, range(2), range(2))
    df_cm.index.name = "Actual"
    df_cm.columns.name = "Predicted"
    labels = np.asarray(["\n".join(map(str, v)) for v in zip(group_names, group_counts, group_percentages)]).reshape(2, 2)

    plt.suptitle(kw, fontsize = 30, ha='center')
    
    sns.set(font_scale=1.4)  # for label size
    sns.heatmap(
        df_cm,
        annot=labels,
        annot_kws={"size": 15},
        cmap="YlOrBr",
        fmt="",
        xticklabels=LABELS,
        yticklabels=LABELS,
    )
    sns.set_style("darkgrid")
    
    plt.savefig(f'stats_diagrams/%s_stats_graph.png' % model_type)
    plt.show()

In [None]:
# Load histories
baseline_fp = "(5)_early_stopping_20"
debiased_fp = "(7)_debiased_50"

with open(f'timeline/%s/history/set10/model_tl_history_mobile_set10.json' % baseline_fp) as f:
    history_mobile = json.load(f)
with open(f'timeline/%s/history/set10/model_tl_history_dense_set10.json' % baseline_fp) as f:
    history_dense = json.load(f)
with open(f'timeline/%s/history/set10/model_tl_history_res_set10.json' % baseline_fp) as f:
    history_res = json.load(f)

In [None]:
# Show statistics for baseline models
gen_stats_graph(data_mobile_both, "cm_mobile_bothg", history_mobile, 'mobile')                   # Accuracy and Loss graphs for MobileNet
gen_stats_graph(data_dense_both, "cm_dense_bothg", history_dense, 'dense')                       # Accuracy and Loss graphs for DenseNet
gen_stats_graph(data_res_both, "cm_res_bothg", history_res, 'res')                               # Accuracy and Loss graphs for ResNet50

# Show statistics for debiased models
gen_stats_graph(data_mobile_both, "cm_mobile_bothg", history_mobile, 'mobile', debiased=True)    # Accuracy and Loss graphs for MobileNet
gen_stats_graph(data_dense_both, "cm_dense_bothg", history_dense, 'dense', debiased=True)        # Accuracy and Loss graphs for DenseNet
gen_stats_graph(data_res_both, "cm_res_bothg", history_res, 'res', debiased=True)                # Accuracy and Loss graphs for ResNet50

# Findings
- Makeup improves accuracy when predicting females
- Glasses improves accuracy when predicting males
- Masks generally degrade accuracy