In [1]:
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Modelos a comparar
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

# Métricas de evaluación
from sklearn.metrics import confusion_matrix, balanced_accuracy_score, f1_score

# Para mostrar la tabla final
from IPython.display import display

In [2]:
models_to_test = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Random Forest': RandomForestClassifier(random_state=42),
    'SVC': SVC(random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42)
}

In [3]:
def evaluate_models(X, y, models, dataset_name):
    """
    Entrena y evalúa una lista de modelos en un dataset.
    Devuelve una lista de diccionarios con los resultados de cada modelo.
    """
    print(f"--- Evaluando Dataset: {dataset_name} ---")
    
    results_list = []
    
    # Preprocesamiento
    categorical_features = X.select_dtypes(include=['category', 'object']).columns
    numerical_features = X.select_dtypes(include=['int64', 'float64']).columns
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ],
        remainder='passthrough'
    )
    
    # División de datos
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
    
    # Bucle para probar cada modelo
    for model_name, model in models.items():
        print(f"\nProbando modelo: {model_name}...")
        
        # Crear y entrenar el pipeline
        pipeline = Pipeline(steps=[
            ('preprocessor', preprocessor),
            ('classifier', model)
        ])
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
        
        # Calcular y mostrar métricas
        print("Matriz de Confusión:")
        print(confusion_matrix(y_test, y_pred))
        
        bal_acc = balanced_accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, pos_label=1, zero_division=0)
        
        print(f"Balanced Accuracy: {bal_acc:.3f}")
        print(f"F1-Score (clase 'mala'): {f1:.3f}")
        
        # Guardar resultados
        results_list.append({
            'dataset': dataset_name,
            'model': model_name,
            'balanced_accuracy': bal_acc,
            'f1_score_bad_class': f1
        })
        
    return results_list

In [4]:
datasets_to_test = {
    'German Credit': 31,
    'Taiwan Credit Default': 42477,
    'Credit Card Fraud': 1597
}

all_results = []

for name, data_id in datasets_to_test.items():
    try:
        data = fetch_openml(data_id=data_id, as_frame=True, parser='auto')
        X = data.data
        y = data.target if 'target' in data else data['class']
        y_bin = pd.Series(pd.factorize(y)[0], index=y.index)
        
        # Llamar a la función que evalúa TODOS los modelos
        results = evaluate_models(X, y_bin, models_to_test, name)
        all_results.extend(results)
        print("\n" + "="*60 + "\n")
        
    except Exception as e:
        print(f"No se pudo procesar el dataset {name}. Error: {e}")
        print("\n" + "="*60 + "\n")

--- Evaluando Dataset: German Credit ---

Probando modelo: Logistic Regression...
Matriz de Confusión:
[[187  23]
 [ 42  48]]
Balanced Accuracy: 0.712
F1-Score (clase 'mala'): 0.596

Probando modelo: Random Forest...
Matriz de Confusión:
[[196  14]
 [ 55  35]]
Balanced Accuracy: 0.661
F1-Score (clase 'mala'): 0.504

Probando modelo: SVC...
Matriz de Confusión:
[[196  14]
 [ 53  37]]
Balanced Accuracy: 0.672
F1-Score (clase 'mala'): 0.525

Probando modelo: Decision Tree...
Matriz de Confusión:
[[158  52]
 [ 51  39]]
Balanced Accuracy: 0.593
F1-Score (clase 'mala'): 0.431


--- Evaluando Dataset: Taiwan Credit Default ---

Probando modelo: Logistic Regression...
Matriz de Confusión:
[[ 482 1509]
 [ 183 6826]]
Balanced Accuracy: 0.608
F1-Score (clase 'mala'): 0.890

Probando modelo: Random Forest...
Matriz de Confusión:
[[ 771 1220]
 [ 399 6610]]
Balanced Accuracy: 0.665
F1-Score (clase 'mala'): 0.891

Probando modelo: SVC...
Matriz de Confusión:
[[ 676 1315]
 [ 284 6725]]
Balanced Accura

In [5]:
final_results_df = pd.DataFrame(all_results)

print("✅ --- TABLA COMPARATIVA FINAL DE MODELOS --- ✅")
display(final_results_df.sort_values(by=['dataset', 'balanced_accuracy'], ascending=[True, False]))

✅ --- TABLA COMPARATIVA FINAL DE MODELOS --- ✅


Unnamed: 0,dataset,model,balanced_accuracy,f1_score_bad_class
9,Credit Card Fraud,Random Forest,0.881727,0.849624
11,Credit Card Fraud,Decision Tree,0.868091,0.770318
8,Credit Card Fraud,Logistic Regression,0.807345,0.716535
10,Credit Card Fraud,SVC,0.800658,0.741667
0,German Credit,Logistic Regression,0.711905,0.596273
2,German Credit,SVC,0.672222,0.524823
1,German Credit,Random Forest,0.661111,0.503597
3,German Credit,Decision Tree,0.592857,0.430939
5,Taiwan Credit Default,Random Forest,0.665158,0.890896
6,Taiwan Credit Default,SVC,0.649504,0.893747


In [6]:
# Celda 6: Calcular y mostrar el rendimiento medio de cada modelo
print("📊 --- RENDIMIENTO MEDIO POR MODELO (TODOS LOS DATASETS) --- 📊")

# Agrupamos por 'model' y calculamos la media de las métricas
average_performance = final_results_df.groupby('model')[['balanced_accuracy', 'f1_score_bad_class']].mean()

# Ordenamos por la métrica que consideremos más importante para ver el ranking
average_performance_sorted = average_performance.sort_values(by='balanced_accuracy', ascending=False)

display(average_performance_sorted)

📊 --- RENDIMIENTO MEDIO POR MODELO (TODOS LOS DATASETS) --- 📊


Unnamed: 0_level_0,balanced_accuracy,f1_score_bad_class
model,Unnamed: 1_level_1,Unnamed: 2_level_1
Random Forest,0.735999,0.748039
Logistic Regression,0.70908,0.734179
SVC,0.707462,0.720079
Decision Tree,0.693551,0.675752
