In [None]:
#-------------------------------------------------------------------------------------JUPYTER NOTEBOOK SETTINGS-------------------------------------------------------------------------------------
from IPython.core.display import display, HTML                                    
display(HTML("<style>.container { width:100% !important; }</style>"))  
import IPython.display as display

In [None]:
import os
import gc
import re
import librosa
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from joblib import dump, load

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.manifold import TSNE
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, f1_score, recall_score, precision_score, accuracy_score

import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Layer, Input, Conv1D, MaxPooling1D, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping 
from tensorflow.keras import mixed_precision

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

In [None]:
import warnings

# Suppress warnings
warnings.filterwarnings('ignore', category=UserWarning, module='keras.src.saving.saving_lib')

In [None]:
# Set up mixed precision policy
mixed_precision.set_global_policy('mixed_float16')

In [None]:
# Define GradientReversalLayer for adversarial models
@tf.keras.utils.register_keras_serializable()
class GradientReversalLayer(Layer):
    def __init__(self, lambda_=1.0, **kwargs):
        super(GradientReversalLayer, self).__init__(**kwargs)
        self.lambda_ = lambda_

    @tf.custom_gradient
    def call(self, x):
        def grad(dy):
            return -self.lambda_ * dy
        return x, grad

    def get_config(self):
        config = super().get_config()
        config.update({"lambda_": self.lambda_})
        return config

# Function to load and predict with models
def evaluate_model(model_path, x_test, y_test, genders_test, label_encoder):
    try:
        custom_objects = {"GradientReversalLayer": GradientReversalLayer}
        model = load_model(model_path, custom_objects=custom_objects)
    except:
        model = load_model(model_path)  # For models without adversarial training

    predictions = model.predict(x_test)
    
    if isinstance(predictions, list):  # Adversarial model
        y_pred_task = predictions[0]
        y_pred_gender = predictions[1]
    else:  # Non-adversarial model
        y_pred_task = predictions
        y_pred_gender = None
    
    y_pred = np.argmax(y_pred_task, axis=1)
    y_pred_labels = label_encoder.inverse_transform(y_pred)
    y_test_encoded = label_encoder.transform(y_test)
    y_test_labels = label_encoder.inverse_transform(y_test_encoded)
    
    command_accuracy = accuracy_score(y_test_labels, y_pred_labels) * 100
    command_precision = precision_score(y_test_labels, y_pred_labels, average='weighted') * 100
    command_recall = recall_score(y_test_labels, y_pred_labels, average='weighted') * 100
    command_f1 = f1_score(y_test_labels, y_pred_labels, average='weighted') * 100
    
    if y_pred_gender is not None:
        gender_labels = np.where(y_pred_gender.flatten() > 0.5, 'female', 'male')
        correct_gender_labels = np.where(np.array(genders_test) == 1, 'female', 'male')
        gender_accuracy = accuracy_score(correct_gender_labels, gender_labels) * 100
        gender_precision = precision_score(correct_gender_labels, gender_labels, average='binary', pos_label='female') * 100
        gender_recall = recall_score(correct_gender_labels, gender_labels, average='binary', pos_label='female') * 100
        gender_f1 = f1_score(correct_gender_labels, gender_labels, average='binary', pos_label='female') * 100
    else:
        gender_accuracy = gender_precision = gender_recall = gender_f1 = 'N/A'
    
    return command_accuracy, command_precision, command_recall, command_f1, gender_accuracy, gender_precision, gender_recall, gender_f1

# Set the path to the models directory
models_directory = 'saved_data/models'

# Load test data
x_test, y_test = load('saved_data/data/adversarial-training_non-masked_data/test_data.joblib')
x_test = np.array(x_test, dtype=np.float32)
genders_train, genders_val, genders_test = load('saved_data/data/adversarial-training_non-masked_data/genders_data.joblib')

# Label encoder setup
all_labels = ['battery', 'description', 'environment', 'greeting', 'health', 'noise', 'nutrition', 'silence', 'sun', 'water']
label_encoder = LabelEncoder()
label_encoder.fit(all_labels)

# DataFrame to store the results
results = []

# Iterate through subdirectories and evaluate models
for subdir, _, files in os.walk(models_directory):
    for file in files:
        if file.endswith('.keras'):
            model_path = os.path.join(subdir, file)
            model_name = os.path.basename(subdir)
            augmentation_level = 'no' if 'non' in model_name else 'low' if 'low' in model_name else 'medium' if 'medium' in model_name else 'high' if 'high' in model_name else 'unknown'
            
            command_accuracy, command_precision, command_recall, command_f1, gender_accuracy, gender_precision, gender_recall, gender_f1 = evaluate_model(model_path, x_test, y_test, genders_test, label_encoder)
            
            results.append({
                'Model Name': model_name,
                'Augmentation Level': augmentation_level,
                'Command Accuracy (%)': f"{command_accuracy:.2f}",
                'Command Precision (%)': f"{command_precision:.2f}",
                'Command Recall (%)': f"{command_recall:.2f}",
                'Command F1 (%)': f"{command_f1:.2f}",
                'Gender Accuracy (%)': f"{gender_accuracy:.2f}" if gender_accuracy != 'N/A' else gender_accuracy,
                'Gender Precision (%)': f"{gender_precision:.2f}" if gender_precision != 'N/A' else gender_precision,
                'Gender Recall (%)': f"{gender_recall:.2f}" if gender_recall != 'N/A' else gender_recall,
                'Gender F1 (%)': f"{gender_f1:.2f}" if gender_f1 != 'N/A' else gender_f1
            })

# Create DataFrame from results
results_df = pd.DataFrame(results)

# Display the full DataFrame in a scrollable view
display.display(display.HTML(results_df.to_html(index=False)))

# Find and print the best models based on metrics
best_command_accuracy_model = results_df.loc[results_df['Command Accuracy (%)'].astype(float).idxmax()]
best_command_f1_model = results_df.loc[results_df['Command F1 (%)'].astype(float).idxmax()]

# Filter out rows with 'N/A' for gender metrics to find the best gender models
gender_results_df = results_df[results_df['Gender Accuracy (%)'] != 'N/A']
best_gender_accuracy_model = gender_results_df.loc[gender_results_df['Gender Accuracy (%)'].astype(float).idxmax()] if not gender_results_df.empty else None
best_gender_f1_model = gender_results_df.loc[gender_results_df['Gender F1 (%)'].astype(float).idxmax()] if not gender_results_df.empty else None

print("Best Command Classification Accuracy Model:")
print(best_command_accuracy_model)
print("\nBest Command Classification F1 Score Model:")
print(best_command_f1_model)

if best_gender_accuracy_model is not None and best_gender_f1_model is not None:
    print("\nBest Gender Prediction Accuracy Model:")
    print(best_gender_accuracy_model)
    print("\nBest Gender Prediction F1 Score Model:")
    print(best_gender_f1_model)
else:
    print("\nNo gender prediction models found or applicable.")