In [None]:
""" All modules for this steps of the pipeline are defined here. """

import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay, roc_auc_score, average_precision_score, RocCurveDisplay, PrecisionRecallDisplay
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import os
from dotenv import load_dotenv
from _4_ModelBuildingTrainingAndHyperparameterOptimisation import ModelBuildAndTrain


In [None]:
#Seeds

""" Loading environment variables from .env file"""
load_dotenv()

base_path: str = os.getenv('base_path')
trials: str = os.getenv('trials')
trial_models: str = os.getenv('trial_models')
results: str = os.getenv('results')
model: str = os.getenv('model')
results: str = os.getenv('results')
amount_of_trials: int = int(os.getenv('amount_of_trials'))
split = os.getenv('split')

conf_title = os.getenv('conf_title')
conf_xlabel = os.getenv('conf_xlabel')
conf_ylabel = os.getenv('conf_ylabel')

# Print to check if paths are loaded correctly
print(f"Base Path: {base_path}")
print(f"Trials Path: {trials}")
print(f"Trials Models Path: {trial_models}")
print(f"Results Path: {results}")
print(f"Model Path: {model}")
print(f"Results Path: {results}")
print(f"Amount of Trials: {amount_of_trials}")
print(f"Split: {split}")

if(conf_title == ""):
    conf_title = "Confusion Matrix"
if(conf_xlabel == ""):
    conf_xlabel = "Predicted Label"
if(conf_ylabel == ""):
    conf_ylabel = "True Label"


In [None]:
""" Function to test the trained model and save performance metrics and plots. """

def Test_Model(x_test_path, y_test_path, model_path):
    # Load the testing data
    X_test = pd.read_csv(x_test_path)
    y_test = pd.read_csv(y_test_path)

    model_loaded = tf.keras.models.load_model(model_path)

    # Convert y_test to NumPy array
    y_test_array = y_test.values
    y_test_categorical = y_test_array

    print("Testing the model...")

    # Evaluate the model
    losses = model_loaded.evaluate(X_test, [X_test, y_test_categorical])
    reconstruction_loss = losses[0]
    classification_loss = losses[1]

    # Predict probabilities for the test set
    y_pred_prob = model_loaded.predict(X_test)[1]

    # Apply threshold to convert probabilities to binary labels
    threshold = 0.5
    y_pred_labels = (y_pred_prob > threshold).astype(int)

    classification_accuracy = accuracy_score(y_test_categorical, y_pred_labels)

    print("Reconstruction Loss:", reconstruction_loss)
    print("Classification Loss:", classification_loss)
    print("Classification Accuracy:", classification_accuracy)

    # Generate confusion matrix and classification report
    conf_matrix = confusion_matrix(y_test_categorical, y_pred_labels)
    class_report = classification_report(y_test_categorical, y_pred_labels)
    roc_auc = roc_auc_score(y_test_categorical, y_pred_prob)
    pr_auc = average_precision_score(y_test_categorical, y_pred_prob)

    # Ensure model folder exists
    os.makedirs(model, exist_ok=True)

    # Save performance metrics to performance_metrics.txt
    performance_metrics_path = os.path.join(model, "performance_metrics.txt")
    with open(performance_metrics_path, "w") as f:
        f.write(f"Reconstruction Loss: {reconstruction_loss}\n")
        f.write(f"Classification Loss: {classification_loss}\n")
        f.write(f"Classification Accuracy: {classification_accuracy}\n")
        f.write(f"Confusion Matrix:\n{conf_matrix}\n\n")
        f.write(f"Classification Report:\n{class_report}\n")
        f.write(f"ROC AUC: {roc_auc}\n")
        f.write(f"PR AUC: {pr_auc}\n")

    # Save predictions comparison to predictions_comparison.txt
    predictions_path = os.path.join(model, "predictions_comparison.txt")
    with open(predictions_path, "w") as f:
        for actual, predicted in zip(y_test_categorical, y_pred_prob):
            f.write(f"[{int(actual[0])}]  {predicted[0]:.2f}\n")

    disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=[0, 1])
    disp.plot(cmap=plt.cm.Blues)
    plt.title(conf_title)
    plt.xlabel(conf_xlabel)
    plt.ylabel(conf_ylabel)

    # Save the figure to the model directory
    confusion_matrix_fig_path = os.path.join(model, "confusion_matrix.png")
    plt.savefig(confusion_matrix_fig_path)
    plt.show()


    # ROC Curve Plot
    roc_display = RocCurveDisplay.from_predictions(y_test_categorical, y_pred_prob)
    plt.title("ROC Curve")
    roc_fig_path = os.path.join(model, "roc_curve.png")
    plt.savefig(roc_fig_path)
    plt.show()

    # Precision-Recall Curve Plot
    pr_display = PrecisionRecallDisplay.from_predictions(y_test_categorical, y_pred_prob)
    plt.title("Precision-Recall Curve")
    pr_fig_path = os.path.join(model, "precision_recall_curve.png")
    plt.savefig(pr_fig_path)
    plt.show()

    print(f"Performance metrics saved to: {performance_metrics_path}")
    print(f"Predictions comparison saved to: {predictions_path}")


In [None]:
""" Main function to build and train the model with hyperparameter optimization. """

if __name__ == "__main__":
   
   best_model_path = ModelBuildAndTrain()
   
   # Example usage:
   x_test_path = os.path.join(split, 'X_Test.csv')
   y_test_path = os.path.join(split, 'Y_Test.csv')

   model_path = best_model_path
   
   Test_Model(x_test_path, y_test_path, model_path)