In [1]:
from BiasStudy import datasets, predictionKit
from BiasStudy.datasets import FairFaceDataset
from BiasStudy.config.TrainingConfig import BiasStudyConfig
from BiasStudy.trainingKit import TrainingLogger
from BiasStudy.trainingKit.TrainingModel import BiasModel

Python Platform: Linux-5.19.0-45-generic-x86_64-with-glibc2.35
Tensor Flow Version: 2.12.1
Keras Version: 2.12.0

Python 3.8.19 (default, Apr  6 2024, 17:58:10) 
[GCC 11.4.0]


In [19]:
import os
import sys
import platform
import numpy as np
import tensorflow as tf
import tensorflow.keras
from matplotlib import pyplot as plt
from tensorflow import keras
from keras.models import Model
from keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Flatten, Dense
import numpy as np
import pandas as pd

In [3]:
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

# Load Models

* RUN_A

In [4]:
BASE_PATH_TO_WEIGHTS = "/notebooks/data/results/run_{0}/{1}_{2}/run_{0}_model_{1}conv_{2}kernel/model/model.h5"

def get_result_paths_dict(run_id: str) -> dict:
    return {
        "8_3": BASE_PATH_TO_WEIGHTS.format(run_id, 8, 3),
        "8_5": BASE_PATH_TO_WEIGHTS.format(run_id, 8, 5),
        "16_3": BASE_PATH_TO_WEIGHTS.format(run_id, 16, 3),
        "16_5": BASE_PATH_TO_WEIGHTS.format(run_id, 16, 5)
    }

In [8]:
run_a_model_paths = get_result_paths_dict("a")

In [9]:
run_a_model_paths

{'8_3': '/notebooks/data/results/run_a/8_3/run_a_model_8conv_3kernel/model/model.h5',
 '8_5': '/notebooks/data/results/run_a/8_5/run_a_model_8conv_5kernel/model/model.h5',
 '16_3': '/notebooks/data/results/run_a/16_3/run_a_model_16conv_3kernel/model/model.h5',
 '16_5': '/notebooks/data/results/run_a/16_5/run_a_model_16conv_5kernel/model/model.h5'}

In [35]:
def get_last_flat_layer_name(model: Model) -> str:
    last_layer_name = None
    for layer in model.layers:
        if type(layer) is Flatten:
            last_layer_name = layer.name
        if type(layer) is Dense and layer.units != 2:
            last_layer_name = layer.name
    return last_layer_name

In [48]:
def load_model_prep_for_bias_study(model_weight_path: str) -> Model:
    model = load_model(model_weight_path)
    embedding_layer = get_last_flat_layer_name(model)
    print("Model: {} - Embedding Layer: {}".format(embedding_layer))
    model.trainable = False
    return Model(
        inputs = model.input,
        outputs = model.get_layer(embedding_layer).output
    )

In [47]:
run_a_model_8_3 = load_model_prep_for_bias_study(run_a_model_paths["8_3"])
run_a_model_8_5 = load_model_prep_for_bias_study(run_a_model_paths["8_5"])
run_a_model_16_3 = load_model_prep_for_bias_study(run_a_model_paths["16_3"])
run_a_model_16_5 = load_model_prep_for_bias_study(run_a_model_paths["16_5"])

Model: run_a_model_8conv_3kernel_no_flat - Embedding Layer: flatten
Model: run_a_model_8conv_5kernel_no_flat - Embedding Layer: flatten
Model: run_a_model_16conv_3kernel_no_flat - Embedding Layer: flatten
Model: run_a_model_16conv_5kernel_no_flat - Embedding Layer: flatten


In [None]:
model_8layers_3kernel_all = load_model(PATH_TO_WEIGHTS["8_layers_3_kernel_no_flat"])

In [None]:
model_8layers_3kernel = Model(
    inputs = model_8layers_3kernel.input,
    outputs = model_8layers_3kernel.get_layer('flatten').output
)

In [None]:
model_8layers_3kernel.trainable = False
model_8layers_3kernel._name = "model_8layers_3kernel" #Due to a miss up in config

In [None]:
model_8layers_5kernel = load_model(PATH_TO_WEIGHTS["8_layers_5_kernel_no_flat"])

In [None]:
model_8layers_5kernel.trainable = False
model_8layers_5kernel._name = "model_8layers_5kernel"

In [None]:
model_16layers_3kernel = load_model(PATH_TO_WEIGHTS["16_layers_3_kernel_no_flat"])

In [None]:
model_16layers_3kernel.trainable = False
model_16layers_3kernel._name = "model_16layers_3kernel"

In [None]:
model_16layers_5kernel = load_model(PATH_TO_WEIGHTS["16_layers_5_kernel_no_flat"])

In [None]:
model_16layers_5kernel.trainable = False
model_16layers_5kernel._name = "model_16layers_5kernel"

# Understand Model Training

In [None]:
HISTORY_PATH_BASE = "/notebooks/data/results/{0}_{1}/model_{0}conv_{1}kernel/csv_logging/logsmodel_{0}conv_{1}kernel.csv"
PATH_TO_HISTORY = {
    "8_layers_3_kernel_no_flat": HISTORY_PATH_BASE.format(8, 3),
    "8_layers_5_kernel_no_flat": HISTORY_PATH_BASE.format(8, 5),
    "16_layers_3_kernel_no_flat": HISTORY_PATH_BASE.format(16, 3),
    "16_layers_5_kernel_no_flat": HISTORY_PATH_BASE.format(16, 5),
}

In [None]:
def read_csv_history(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    df = df[df.epoch != 'epoch']
    df = df.reset_index(drop=True)
    df['index'] = df.index
    df = df.loc[df.groupby("epoch")["index"].idxmax()]
    df = df.astype(float)
    df = df.sort_values(by=['epoch'], ascending=True)
    df = df.reset_index(drop=True)
    return df.copy()

In [None]:
history_8layer_3kernel = read_csv_history(PATH_TO_HISTORY["8_layers_3_kernel_no_flat"])
history_8layer_5kernel = read_csv_history(PATH_TO_HISTORY["8_layers_5_kernel_no_flat"])
history_16layer_3kernel = read_csv_history(PATH_TO_HISTORY["16_layers_3_kernel_no_flat"])
history_16layer_5kernel = read_csv_history(PATH_TO_HISTORY["16_layers_5_kernel_no_flat"])

In [None]:
histories = {
    "8_layers_3_kernel": history_8layer_3kernel,
    "8_layers_5_kernel": history_8layer_5kernel,
    "16_layers_3_kernel": history_16layer_3kernel,
    "16_layers_5_kernel": history_16layer_5kernel,
}

In [None]:
def plot_all(histories_dict):
    num_plots = len(histories_dict)
    f, axs = plt.subplots(1, num_plots, sharey=True, figsize=(num_plots * 5,  5))
    for idx, (key, history) in enumerate(histories_dict.items()):
        axs[idx].plot(history.loss, label='training')
        axs[idx].plot(history.val_loss, label='validation')
        axs[idx].legend()
        axs[idx].set_title(key)
    plt.show()

In [None]:
plot_all(histories)

# Load Dataset

In [None]:
IMAGE_DIR = "/notebooks/data/LFWA+/COMPILED/compiled_lfwa+"
BASE_DIR = "/notebooks/data/LFWA+/COMPILED"
ALL_PAIRS = "compiled_lfwa+_all_pairs.csv"

In [None]:
all_pairs =  pd.read_csv("{}/{}".format(BASE_DIR, ALL_PAIRS))

# Evaluation

## 8Layers - 3 Kernel Size

In [None]:
from BiasStudy.predictionKit import PredictionToolKit
from BiasStudy.predictionKit import PredictionPlotKit

In [None]:
model_8layers_3kernel_wit_flat_result = PredictionToolKit.predict(
    feature_name = 'binary_race',
    unique_image_id_col_name = 'image_id',
    file_path_col_name = 'file_path',
    pair_id_col_name = 'gid',
    y_col = 'y_match_per_group',
    image_dir = IMAGE_DIR,
    dataset = all_pairs,
    model = model_8layers_3kernel_with_flat,
    verbose = 0, 
    best_threshold_method = 'gmeans'
)

In [None]:
print(model_8layers_3kernel_wit_flat_result)

In [None]:
PredictionPlotKit.plot_roc(model_8layers_3kernel_wit_flat_result)

In [None]:
model_8layers_3kernel_result = PredictionToolKit.predict(
    feature_name = 'binary_race',
    unique_image_id_col_name = 'image_id',
    file_path_col_name = 'file_path',
    pair_id_col_name = 'gid',
    y_col = 'y_match_per_group',
    image_dir = IMAGE_DIR,
    dataset = all_pairs,
    model = model_8layers_3kernel,
    verbose = 0, 
    best_threshold_method = 'gmeans'
)

In [None]:
print(model_8layers_3kernel_result)

In [None]:
PredictionPlotKit.plot_roc(model_8layers_3kernel_result)

## 8Layers - 5 Kernel Size

In [None]:
model_8layers_5kernel_result = PredictionToolKit.predict(
    feature_name = 'binary_race',
    unique_image_id_col_name = 'image_id',
    file_path_col_name = 'file_path',
    pair_id_col_name = 'gid',
    y_col = 'y_match_per_group',
    image_dir = IMAGE_DIR,
    dataset = all_pairs,
    model = model_8layers_5kernel,
    verbose = 0, 
    best_threshold_method = 'gmeans'
)

In [None]:
print(model_8layers_5kernel_result)

In [None]:
PredictionPlotKit.plot_roc(model_8layers_5kernel_result)

## 16Layers - 3 Kernel Size

In [None]:
model_16layers_3kernel_result = PredictionToolKit.predict(
    feature_name = 'binary_race',
    unique_image_id_col_name = 'image_id',
    file_path_col_name = 'file_path',
    pair_id_col_name = 'gid',
    y_col = 'y_match_per_group',
    image_dir = IMAGE_DIR,
    dataset = all_pairs,
    model = model_16layers_3kernel,
    verbose = 0, 
    best_threshold_method = 'gmeans'
)

In [None]:
print(model_16layers_3kernel_result)

In [None]:
PredictionPlotKit.plot_roc(model_16layers_3kernel_result)

## 16Layers - 5Kernel Size

In [None]:
model_16layers_5kernel_result = PredictionToolKit.predict(
    feature_name = 'binary_race',
    unique_image_id_col_name = 'image_id',
    file_path_col_name = 'file_path',
    pair_id_col_name = 'gid',
    y_col = 'y_match_per_group',
    image_dir = IMAGE_DIR,
    dataset = all_pairs,
    model = model_16layers_5kernel,
    verbose = 0, 
    best_threshold_method = 'gmeans'
)

In [None]:
print(model_16layers_5kernel_result)

In [None]:
PredictionPlotKit.plot_roc(model_16layers_5kernel_result)

## ALL

In [None]:
PredictionPlotKit.plot_multiple_roc(
    [
        model_8layers_3kernel_result,
        model_8layers_5kernel_result,
        model_16layers_3kernel_result,
        model_16layers_5kernel_result,
        model_8layers_3kernel_wit_flat_result
    ]
)