# Librerias

In [1]:
import argparse
import pandas as pd
import numpy as np
import os

from scipy.stats import shapiro, mannwhitneyu, ttest_ind
from statsmodels.stats.multitest import multipletests
from sklearn import metrics

from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import make_pipeline

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import (roc_auc_score, accuracy_score, f1_score, precision_score,
                             recall_score, balanced_accuracy_score, cohen_kappa_score,
                             matthews_corrcoef, confusion_matrix)
from sklearn.feature_selection import VarianceThreshold

import matplotlib.pyplot as plt
import seaborn as sns

import matplotlib as mpl
from sklearn.preprocessing import label_binarize
mpl.use('Agg')
import scienceplots

plt.style.use(['science', 'grid'])
dpi = 300
from scipy.stats import kruskal, f_oneway
plt.rcParams["text.usetex"] = False


In [2]:
def get_models(random_state=42):
    """
    Define los pipelines para cada clasificador, incluyendo preprocesamiento estándar.
    
    Args:
        random_state (int): Semilla para reproducibilidad
    
    Returns:
        list: Lista de tuplas (nombre_modelo, pipeline_scikit)
    """

    # Pipeline para Support Vector Machine
    pipe_svc = make_pipeline(
        StandardScaler(), # Normalización de características
        VarianceThreshold(),  # Eliminación de características con varianza nula
        SVC(random_state=random_state, class_weight="balanced", probability=True)
    )
    
    # Pipeline para Regresión Logística
    pipe_lr = make_pipeline(
        StandardScaler(),
        VarianceThreshold(),
        LogisticRegression(
            penalty='elasticnet',       # Regularización combinada L1 y L2
            l1_ratio=0.5,               # Ratio para elasticnet (0.5 = igual peso L1 y L2)
            class_weight="balanced",
            random_state=random_state,
            solver='saga',              # Optimizador para elasticnet
            max_iter=10000              # Iteraciones máximas
        )
    )
    
    # Pipeline para Random Forest
    pipe_rf = make_pipeline(
        StandardScaler(),
        VarianceThreshold(),
        RandomForestClassifier(n_jobs=-1, class_weight="balanced_subsample", random_state=random_state)
    )
    
    # Pipeline para Naive Bayes Gaussiano
    pipe_nb = make_pipeline(
        StandardScaler(),
        VarianceThreshold(),
        GaussianNB() # No necesita parámetros adicionales
    )
    
    # Pipeline para K-Nearest Neighbors
    pipe_knn = make_pipeline(
        StandardScaler(),
        VarianceThreshold(),
        KNeighborsClassifier(n_jobs=-1)
    )
    
    # Pipeline para Gradient Boosting
    pipe_gb = make_pipeline(
        StandardScaler(),
        VarianceThreshold(),
        GradientBoostingClassifier(random_state=random_state)
    )

    # Lista con todos los modelos
    models = [
        ("SVM", pipe_svc),
        ("Logistic Regression", pipe_lr),
        ("Random Forest", pipe_rf),
        ("Naive Bayes", pipe_nb),
        ("KNN", pipe_knn),
        ("Gradient Boosting", pipe_gb),
    ]
    return models

# Multiclass

In [3]:
def evaluate_model_multiclass(model, X, y, groups, n_splits=5, n_repeats=1, base_random_state=42):
    """
    Realiza validación cruzada repetida estratificada por grupos (multiclase).
    
    Args:
        model: Modelo a evaluar (pipeline de scikit-learn)
        X (pd.DataFrame): Características
        y (np.array): Etiquetas multiclase
        groups (np.array): Identificadores de grupos (pacientes) para CV
        n_splits (int): Número de particiones por repetición
        n_repeats (int): Número de repeticiones de la validación cruzada
        base_random_state (int): Semilla base para reproducibilidad
    
    Returns:
        tuple: (fold_results, pred_vals)
            - fold_results: Lista de diccionarios con métricas por fold
            - pred_vals: Dict con datos de predicciones para cada fold
    """
    fold_results = []
    folds_data = []
    global_fold_index = 0
    classes = np.unique(y)
    for rep in range(n_repeats):
        current_random_state = base_random_state + rep
        splitter = StratifiedGroupKFold(
            n_splits=n_splits, shuffle=True, random_state=current_random_state
        )
        for train_idx, val_idx in splitter.split(X, y, groups=groups):
            global_fold_index += 1
            X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
            y_train, y_val = y[train_idx], y[val_idx]
            model.fit(X_train, y_train)
            y_train_pred = model.predict(X_train)

            # Probabilidades o scores
            if hasattr(model, "predict_proba"):
                y_train_prob = model.predict_proba(X_train)
            elif hasattr(model, "decision_function"):
                y_train_prob = model.decision_function(X_train)
            else:
                y_train_prob = None

            # AUC multiclase en entrenamiento
            try:
                y_train_bin = label_binarize(y_train, classes=classes)
                if y_train_prob is not None and len(np.unique(y_train)) > 1:
                    train_auc = roc_auc_score(y_train_bin, y_train_prob, multi_class="ovr", average="macro")
                else:
                    train_auc = np.nan
            except:
                train_auc = np.nan

            train_f1_macro = f1_score(y_train, y_train_pred, average="macro")

            # Validación
            y_val_pred = model.predict(X_val)
            if hasattr(model, "predict_proba"):
                y_val_prob = model.predict_proba(X_val)
            elif hasattr(model, "decision_function"):
                y_val_prob = model.decision_function(X_val)
            else:
                y_val_prob = None

            # AUC multiclase en validación
            try:
                y_val_bin = label_binarize(y_val, classes=classes)
                if y_val_prob is not None and len(np.unique(y_val)) > 1:
                    val_auc = roc_auc_score(y_val_bin, y_val_prob, multi_class="ovr", average="macro")
                else:
                    val_auc = np.nan
            except:
                val_auc = np.nan

            val_mcc = matthews_corrcoef(y_val, y_val_pred)
            val_kappa = cohen_kappa_score(y_val, y_val_pred)
            val_f1_macro = f1_score(y_val, y_val_pred, average="macro")
            val_accuracy = accuracy_score(y_val, y_val_pred)
            val_balanced_accuracy = balanced_accuracy_score(y_val, y_val_pred)

            # Métricas por clase
            per_class_precision = precision_score(y_val, y_val_pred, average=None, labels=classes)
            per_class_recall = recall_score(y_val, y_val_pred, average=None, labels=classes)
            per_class_f1 = f1_score(y_val, y_val_pred, average=None, labels=classes)

            # Matriz de confusión y exactitud por clase
            cm = confusion_matrix(y_val, y_val_pred, labels=classes)
            per_class_accuracy = []
            for i in range(len(cm)):
                row_sum = np.sum(cm[i, :])
                if row_sum > 0:
                    per_class_accuracy.append(cm[i, i] / row_sum)
                else:
                    per_class_accuracy.append(np.nan)

            fold_metrics = {
                "Fold": global_fold_index,
                "Repeat": rep + 1,
                "train_auc": train_auc,
                "train_f1_macro": train_f1_macro,
                "val_auc": val_auc,
                "val_mcc": val_mcc,
                "val_kappa": val_kappa,
                "val_f1_macro": val_f1_macro,
                "val_accuracy": val_accuracy,
                "val_balanced_accuracy": val_balanced_accuracy,
                "per_class_precision": per_class_precision.tolist(),
                "per_class_recall": per_class_recall.tolist(),
                "per_class_f1": per_class_f1.tolist(),
                "per_class_accuracy": per_class_accuracy
            }
            fold_results.append(fold_metrics)

            folds_data.append({
                "fold_index": global_fold_index,
                "Repeat": rep + 1,
                "y_val": y_val,
                "y_val_pred": y_val_pred,
                "y_val_prob": y_val_prob
            })

    pred_vals = {
        "folds": folds_data
    }
    return fold_results, pred_vals

# Código principal

In [4]:
"""
Función principal que coordina el proceso completo de entrenamiento y evaluación:
1. Procesa argumentos de línea de comandos
2. Carga y preprocesa los datos
3. Realiza selección de características (opcional)
4. Entrena y evalúa modelos
5. Genera curvas ROC y resultados
6. Ejecuta scripts complementarios (opcional)
"""
# --- Configuración de argumentos de línea de comandos ---    
parser = argparse.ArgumentParser(
    description="Evaluación de modelos con validación cruzada repetida"
)
parser.add_argument(
    "--csv", type=str,
    choices=["features_all_gland.csv", "features_all_full.csv"],
    default="features_all_gland.csv",
    help="Nombre del CSV con las características."
)
parser.add_argument(
    "--data_pre", type=str,
    default="../../../artifacts/radiomics",
    help="Directorio raíz donde se encuentran los datos radiomics."
)
parser.add_argument(
    "--results_base", type=str, default="../../../results/radiomics",
    help="Directorio base donde se crearán los resultados."
)
parser.add_argument(
    "--n_splits", type=int, default=5,
    help="Número de particiones para StratifiedGroupKFold (por repetición)."
)
parser.add_argument(
    "--n_repeats", type=int, default=10,
    help="Número de repeticiones de la validación cruzada."
)
parser.add_argument(
    "--feature_strategy", type=str,
    choices=["all", "most_discriminant"],
    default="most_discriminant",
    help="Estrategia de selección de features: 'all' o 'most_discriminant'."
)
parser.add_argument(
    "--calculate_differences", action="store_true", default=True,
    help="Si se habilita, ejecuta model_differences.py."
)
parser.add_argument(
    "--fine_tune_best_model", action="store_true", default=False,
    help="Si se habilita, realiza fine-tuning del mejor modelo."
)

args = parser.parse_args(args=[])



# Cargar datos

In [5]:
# --- Carga de datos y preprocesamiento ---
label_csv= "label1" 
num_label = label_csv[-1]
df = pd.read_csv(f'/mnt/datalake/openmind/MedP-Midas/data/features_t2w/features_t2w_{label_csv}_with_pfirrmann.csv')
y = df[f"{num_label}"]
groups = df["patient_id"]
X = df.drop([ 'patient_id','1', '2', '3', '4', '5','study_id', 'label', 'mask_type',
                              'diagnostics_Versions_PyRadiomics', 'diagnostics_Versions_Numpy', 
                              'diagnostics_Versions_SimpleITK', 'diagnostics_Versions_PyWavelet', 
                              'diagnostics_Versions_Python', 'diagnostics_Configuration_Settings', 
                              'diagnostics_Configuration_EnabledImageTypes', 'diagnostics_Image-original_Hash', 
                              'diagnostics_Image-original_Dimensionality', 'diagnostics_Image-original_Spacing', 
                              'diagnostics_Image-original_Size', 'diagnostics_Image-original_Mean', 
                              'diagnostics_Image-original_Minimum', 'diagnostics_Image-original_Maximum', 
                              'diagnostics_Mask-original_Hash', 'diagnostics_Mask-original_Spacing', 
                              'diagnostics_Mask-original_Size', 'diagnostics_Mask-original_BoundingBox', 
                              'diagnostics_Mask-original_VoxelNum', 'diagnostics_Mask-original_VolumeNum', 
                              'diagnostics_Mask-original_CenterOfMassIndex', 'diagnostics_Mask-original_CenterOfMass', 
                              'diagnostics_Image-interpolated_Spacing', 'diagnostics_Image-interpolated_Size', 
                              'diagnostics_Image-interpolated_Mean', 'diagnostics_Image-interpolated_Minimum', 
                              'diagnostics_Image-interpolated_Maximum', 'diagnostics_Mask-interpolated_Spacing', 
                              'diagnostics_Mask-interpolated_Size', 'diagnostics_Mask-interpolated_BoundingBox', 
                              'diagnostics_Mask-interpolated_VoxelNum', 'diagnostics_Mask-interpolated_VolumeNum', 
                              'diagnostics_Mask-interpolated_CenterOfMassIndex', 'diagnostics_Mask-interpolated_CenterOfMass', 
                              'diagnostics_Mask-interpolated_Mean', 'diagnostics_Mask-interpolated_Minimum', 
                              'diagnostics_Mask-interpolated_Maximum',], axis=1)

experiment_dir = "/mnt/datalake/openmind/MedP-Midas/data/features_t2w"
os.makedirs(experiment_dir, exist_ok=True)
print(f"Creada carpeta de resultados: {experiment_dir}")

Creada carpeta de resultados: /mnt/datalake/openmind/MedP-Midas/data/features_t2w


In [6]:
# --- Selección de características ---
selected_features = X.columns

if args.feature_strategy == "most_discriminant":
    print(">> Realizando selección de características...")

    # Directorios para resultados de selección de características
    fs_dir = os.path.join(experiment_dir, "feature_selection")
    os.makedirs(fs_dir, exist_ok=True)
    images_dir = os.path.join(fs_dir, f"images{label_csv}")
    os.makedirs(images_dir, exist_ok=True)

    # Inicializar listas para almacenar estadísticas por característica
    feature_names, test_type_list, pvalue_list = ([] for _ in range(3))

    # Evaluar cada característica individualmente
    for column in X.columns:
        stat, p = shapiro(X[column])
        grupos = [X[column][y == clase] for clase in np.unique(y)]
        feature_names.append(column)
        alpha = 0.05
        if p > alpha:
            test_type_list.append('ANOVA')
            stats, pval = f_oneway(*grupos)
        else:
            test_type_list.append('Kruskal-Wallis')
            stats, pval = kruskal(*grupos)
        pvalue_list.append(pval)

    # Crear DataFrame con todas las estadísticas por característica
    train_auc_pvals_df = pd.DataFrame(
        list(zip(test_type_list, pvalue_list)),
        index=feature_names,
        columns=['Test', 'p-value']
    ).sort_values(by='p-value', ascending=True)

    # Seleccionar características: máximo 1 característica por cada 15 muestras
    num_features_model = round(X.shape[0] / 15)
    train_df = train_auc_pvals_df.sort_values(by='p-value', ascending=True)

    # Seleccionar las N características más significativas
    selected_features = train_df.index[0:num_features_model]
    print(f"  --> Seleccionadas {len(selected_features)} características más relevantes.")

    # Filtrar DataFrame para usar solo las características seleccionadas
    X = X[selected_features]
    # Guardar DataFrame con estadísticas completas
    df_path_1 = os.path.join(fs_dir, f"train_auc_pvals_df{label_csv}.csv")
    train_auc_pvals_df.loc[selected_features].to_csv(df_path_1)
    print(f"  --> Guardado CSV: {df_path_1}\n")


    # --- Generar visualizaciones para las TOP 20 características ---
    top_20 = train_auc_pvals_df.index[:20]

    for rank, feature_name in enumerate(top_20, start=1):
        # Crear nombre de archivo
        safe_feat_name = feature_name.replace("/", "_")
        feat_folder_name = f"{rank}_{safe_feat_name}"
        feat_folder_path = os.path.join(images_dir, feat_folder_name)
        os.mkdir(feat_folder_path)
        
        # 1. Gráfico de violín para visualizar distribuciones por clase
        plt.figure(figsize=(9, 9))
        sns.violinplot(x=y, y=df[feature_name], color='grey')
        plt.title(f"Distribución de {feature_name} por clase Pfirrmann", fontsize=14)
        plt.xlabel("Clase de Pfirrmann")
        plt.ylabel(feature_name)
        violin_plot_path = os.path.join(feat_folder_path, f"{safe_feat_name}_violinplot.png")
        plt.savefig(violin_plot_path, dpi=dpi)
        plt.close()
else:
    print(">> Usando TODAS las características (sin selección).")



>> Realizando selección de características...
  --> Seleccionadas 48 características más relevantes.
  --> Guardado CSV: /mnt/datalake/openmind/MedP-Midas/data/features_t2w/feature_selection/train_auc_pvals_dflabel1.csv



# Entrenamiento y evaluación modelos

In [7]:
#--- Entrenamiento y evaluación de modelos ---
models = get_models(random_state=42)

# Colectores para resultados
all_results = []
preds_data = []

# Evaluar cada modelo
for model_name, model in models:
    print(f"Evaluando {model_name}...")
    fold_metrics_list, pred_vals = evaluate_model_multiclass(
        model, X, y, groups,
        n_splits=args.n_splits,
        n_repeats=args.n_repeats,
        base_random_state=42
    )

    # Añadir nombre de clasificador a cada resultado
    for fold_metrics in fold_metrics_list:
        fold_metrics["Classifier"] = model_name
        all_results.append(fold_metrics)

    # Almacenar predicciones
    preds_data.append({
        "Classifier": model_name,
        "folds": pred_vals["folds"]
    })

# Crear DataFrame con todos los resultados
df_resultados = pd.DataFrame(all_results)

# Ordenar columnas para mejor legibilidad
fixed_cols = ["Classifier", "Fold", "Repeat"]
other_cols = [c for c in df_resultados.columns if c not in fixed_cols]
df_resultados = df_resultados[fixed_cols + other_cols]
df_resultados.sort_values(by=["Classifier", "Fold"], inplace=True)

# Generar nombre de archivo para resultados
resultados_filename = f"resultados_discoslumbar_label{num_label}.csv"

#Guardar resultados
resultados_filepath = os.path.join(experiment_dir, resultados_filename)
df_resultados.to_csv(resultados_filepath, index=False)
print(f"\nResultados guardados en '{resultados_filepath}'")


# --- Estructurar datos de predicciones para guardar ---
records_for_csv = []
for item in preds_data:
    clf_name = item["Classifier"]
    folds_info = item["folds"]
    for fold_info in folds_info:
        fold_idx = fold_info["fold_index"]
        rep_idx = fold_info["Repeat"]
        
        y_val_list = fold_info["y_val"].tolist()
        y_pred_list = fold_info["y_val_pred"].tolist()
        if fold_info["y_val_prob"] is not None:
            y_prob_list = fold_info["y_val_prob"].tolist()
        else:
            y_prob_list = []
        
        records_for_csv.append({
            "Classifier": clf_name,
            "Fold": fold_idx,
            "Repeat": rep_idx,
            "y_val": y_val_list,
            "y_pred": y_pred_list,
            "y_prob": y_prob_list
        })

# Guardar predicciones en CSV
df_preds = pd.DataFrame(records_for_csv)
preds_filename = f"preds_discoslumbar_label{num_label}.csv"
preds_filepath = os.path.join(experiment_dir, preds_filename)
df_preds.to_csv(preds_filepath, index=False)
print(f"Predicciones guardadas en '{preds_filepath}'")

# --- Guardar lista de variables utilizadas ---
variables_txt_path = os.path.join(experiment_dir, f"variables_usadas_{label_csv}.txt")
with open(variables_txt_path, "w") as f:
    for feat in selected_features:
        f.write(str(feat) + "\n")
print(f"Archivo con variables usadas: {variables_txt_path}")





Evaluando SVM...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluando Logistic Regression...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluando Random Forest...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Evaluando Naive Bayes...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Evaluando KNN...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Evaluando Gradient Boosting...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize


Resultados guardados en '/mnt/datalake/openmind/MedP-Midas/data/features_t2w/resultados_discoslumbar_label1.csv'
Predicciones guardadas en '/mnt/datalake/openmind/MedP-Midas/data/features_t2w/preds_discoslumbar_label1.csv'
Archivo con variables usadas: /mnt/datalake/openmind/MedP-Midas/data/features_t2w/variables_usadas_label1.txt
