## 1. Importacion De Librerias

In [18]:
import numpy as np
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

import os

import joblib

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score
)

## 2. Definicion De Datos, Modelos y Direcrtorios

In [19]:
DATASETS_DIRS = [
    "estandarizado",
    "estandarizado_PCA80",
    "estandarizado_PCA95",
    "normalizado",
    "normalizado_PCA80",
    "normalizado_PCA95",
    "original",
    "original_PCA80",
    "original_PCA95",
]

MODELS = [
    "KNN",
    "SVM",
    "NaiveBayes",
    "RandomForest"
]

K_FOLDS = 5

DATA_DIR = os.path.join("..", "data")

if not os.path.exists(DATA_DIR):
    raise FileNotFoundError(f"Data directory '{DATA_DIR}' does not exist. Please ensure the data is available.")

MODELS_DIR = os.path.join("..", "models")

if not os.path.exists(MODELS_DIR):
    raise FileNotFoundError(f"Models directory '{MODELS_DIR}' does not exist. Please ensure you execute the training script first.")

## 3. Carga De Modelos Y Evaluacion

In [20]:
def load_model(data_type, model_name, model_iteration, model_dir):
    model_path = os.path.join(model_dir, f"{data_type}", f"{model_name}", f"model_fold_{model_iteration}.pkl")
    model = joblib.load(model_path)
    return model

def test_model_and_save_results(X_test, y_test, model):
    
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)
    
    metrics = {}
    
    metrics['accuracy'] = accuracy_score(y_test, y_pred)
    
    metrics['precision'] = precision_score(y_test, y_pred, average='macro')
    metrics['recall'] = recall_score(y_test, y_pred, average='macro')
    metrics['f1_score'] = f1_score(y_test, y_pred, average='macro')
    metrics['roc_auc'] = roc_auc_score(y_test, y_pred_proba, average='macro', multi_class='ovr')
    
    multiclass_metrics = calculate_multiclass_metrics(y_test, y_pred)
    
    metrics.update(multiclass_metrics)
    
    # Create DataFrame with true labels, predictions, and probability columns for each class
    predicted_probability_df = pd.DataFrame(y_pred_proba, columns=[f'Prob_Class_{i}' for i in range(y_pred_proba.shape[1])])
    predicted_probability_df.insert(0, 'True_Label', y_test)
    predicted_probability_df.insert(1, 'Predicted', y_pred)
    
    
    metrics_df = pd.DataFrame([metrics])
    
    return metrics_df, predicted_probability_df
    
    

def calculate_multiclass_metrics(y_true, y_pred):
    
    metrics = {}
    
    unique_classes = np.unique(y_true)
    
    sensitivity_list = []
    specificity_list = []
    fpr_list = []
    fnr_list = []
    
    for cls in unique_classes:
        y_true_binary = (y_true == cls).astype(int)
        y_pred_binary = (y_pred == cls).astype(int)
        
        
        TP = np.sum((y_true_binary == 1) & (y_pred_binary == 1))
        TN = np.sum((y_true_binary == 0) & (y_pred_binary == 0))
        FP = np.sum((y_true_binary == 0) & (y_pred_binary == 1))
        FN = np.sum((y_true_binary == 1) & (y_pred_binary == 0))
        
        sensitivity = TP / (TP + FN) if (TP + FN) > 0 else 0
        specificity = TN / (TN + FP) if (TN + FP) > 0 else 0
        fpr = FP / (FP + TN) if (FP + TN) > 0 else 0
        fnr = FN / (FN + TP) if (FN + TP) > 0 else 0
        
        sensitivity_list.append(sensitivity)
        specificity_list.append(specificity)
        fpr_list.append(fpr)
        fnr_list.append(fnr)
        
    metrics['sensitivity'] = np.mean(sensitivity_list)
    metrics['specificity'] = np.mean(specificity_list)
    metrics['fpr'] = np.mean(fpr_list)
    metrics['fnr'] = np.mean(fnr_list)
    
    return metrics
    

for data_type in DATASETS_DIRS:
    
    data_path = os.path.join(DATA_DIR, data_type)
    
    if not os.path.exists(data_path):
        print(f"Data path '{data_path}' does not exist. Skipping...")
        continue
    
    for fold in range(K_FOLDS):
        
        fold_path = os.path.join(data_path, f"test_{fold + 1}_{data_type}.csv")
        
        X_test = pd.read_csv( fold_path ).values[:, :-1]
        y_test = pd.read_csv( fold_path ).values[:, -1]
        
        for model_name in MODELS:
            
            model = load_model(data_type, model_name, fold + 1, MODELS_DIR)
            
            print(f"Evaluating Model: {model_name}, Data Type: {data_type}, Fold: {fold + 1}")
            
            metrics_df, predicted_probability_df = test_model_and_save_results(X_test, y_test, model)
            
            results_dir = os.path.join("..", "results", data_type, model_name)
            
            if not os.path.exists(results_dir):
                os.makedirs(results_dir)
                
            results_path = os.path.join(results_dir, f"results_predictions_fold_{fold + 1}.csv")
            predicted_probability_df.to_csv(results_path, index=False)
            
            print(f"Results saved at: {results_path}")
            
            metrics_path = os.path.join(results_dir, f"results_metrics_fold_{fold + 1}.csv")
            metrics_df.to_csv(metrics_path, index=False)
            
            print(f"Metrics saved at: {metrics_path}")


Evaluating Model: KNN, Data Type: estandarizado, Fold: 1
Results saved at: ../results/estandarizado/KNN/results_predictions_fold_1.csv
Metrics saved at: ../results/estandarizado/KNN/results_metrics_fold_1.csv
Evaluating Model: SVM, Data Type: estandarizado, Fold: 1
Results saved at: ../results/estandarizado/SVM/results_predictions_fold_1.csv
Metrics saved at: ../results/estandarizado/SVM/results_metrics_fold_1.csv
Evaluating Model: NaiveBayes, Data Type: estandarizado, Fold: 1
Results saved at: ../results/estandarizado/NaiveBayes/results_predictions_fold_1.csv
Metrics saved at: ../results/estandarizado/NaiveBayes/results_metrics_fold_1.csv
Evaluating Model: RandomForest, Data Type: estandarizado, Fold: 1
Results saved at: ../results/estandarizado/RandomForest/results_predictions_fold_1.csv
Metrics saved at: ../results/estandarizado/RandomForest/results_metrics_fold_1.csv
Evaluating Model: KNN, Data Type: estandarizado, Fold: 2
Results saved at: ../results/estandarizado/KNN/results_pred