In [22]:
# Librerias necesarias
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn import linear_model, metrics, model_selection
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Ridge
import seaborn as sns
from sklearn.linear_model import Lasso
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingRegressor,  GradientBoostingClassifier
from sklearn.metrics import (classification_report, confusion_matrix, roc_auc_score, accuracy_score, 
                classification_report, precision_score, recall_score, f1_score,mean_squared_error, r2_score)
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier

import xgboost
from xgboost import XGBClassifier

In [2]:
df = pd.read_csv('../data/raw/data.csv')

In [3]:
X = df.drop(columns=["Bankrupt?"])
y= df["Bankrupt?"]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [5]:
# Dataset sin balancear
rf = RandomForestClassifier(
    n_estimators=100,
    max_depth=None,
    random_state=42,
    
    n_jobs=-1  # para aprovechar todos los núcleos del procesador
)

rf.fit(X_train, y_train)

# Evaluamos en el conjunto de test original
y_pred = rf.predict(X_test)
y_proba = rf.predict_proba(X_test)[:, 1]

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_proba))

Confusion Matrix:
 [[1312    1]
 [  42    9]]

Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.98      1313
           1       0.90      0.18      0.30        51

    accuracy                           0.97      1364
   macro avg       0.93      0.59      0.64      1364
weighted avg       0.97      0.97      0.96      1364

ROC AUC: 0.9478741991846243


In [None]:
# Balanceo de dataset con SMOTE

In [7]:
smote = SMOTE(random_state=42)
X_train_bal, y_train_bal = smote.fit_resample(X_train, y_train)

In [8]:
# Training con dataset balanceado
rf = RandomForestClassifier(
    n_estimators=100,
    max_depth=None,
    random_state=42,
    
    n_jobs=-1  # para aprovechar todos los núcleos del procesador
)

rf.fit(X_train_bal, y_train_bal)

# Evaluamos en el conjunto de test original
y_pred = rf.predict(X_test)
y_proba = rf.predict_proba(X_test)[:, 1]

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_proba))

Confusion Matrix:
 [[1280   33]
 [  18   33]]

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.97      0.98      1313
           1       0.50      0.65      0.56        51

    accuracy                           0.96      1364
   macro avg       0.74      0.81      0.77      1364
weighted avg       0.97      0.96      0.96      1364

ROC AUC: 0.9531980347356003


In [15]:
# GridSearch de mejores parámetros para RandomForestClassifier

param_grid = {
    "max_depth": [3, 5, 10, None],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 5, 10, 20, 30, 40],
    "n_estimators": [50, 100, 150, 200]
}

grid = GridSearchCV(
    RandomForestClassifier(class_weight="balanced", random_state=42, n_jobs=-1),
    param_grid,
    scoring="roc_auc",
    cv=5,
    n_jobs=-1
)

grid.fit(X_train, y_train)

In [17]:
print("Mejores parámetros encontrados:")
print(grid.best_params_)

Mejores parámetros encontrados:
{'max_depth': 10, 'min_samples_leaf': 10, 'min_samples_split': 2, 'n_estimators': 200}


In [18]:
# Training con los mejores parámetros encontrados
rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    random_state=42,
    min_samples_leaf=10,
    min_samples_split=2,
    
    n_jobs=-1  # para aprovechar todos los núcleos del procesador
)

rf.fit(X_train_bal, y_train_bal)

# Evaluamos en el conjunto de test original
y_pred = rf.predict(X_test)
y_proba = rf.predict_proba(X_test)[:, 1]

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_proba))

Confusion Matrix:
 [[1235   78]
 [  13   38]]

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.94      0.96      1313
           1       0.33      0.75      0.46        51

    accuracy                           0.93      1364
   macro avg       0.66      0.84      0.71      1364
weighted avg       0.96      0.93      0.95      1364

ROC AUC: 0.9573047802517809


In [19]:
# Comparación con diferentes Threshold

y_proba_test = rf.predict_proba(X_test)[:, 1]

# Probar varios thresholds
thresholds = np.linspace(0.1, 0.9, 17)

for t in thresholds:
    y_pred = (y_proba_test >= t).astype(int)
    print(f"\nThreshold: {t:.2f}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=["No Quiebra", "Quiebra"]))


Threshold: 0.10
Confusion Matrix:
[[997 316]
 [  1  50]]
Classification Report:
              precision    recall  f1-score   support

  No Quiebra       1.00      0.76      0.86      1313
     Quiebra       0.14      0.98      0.24        51

    accuracy                           0.77      1364
   macro avg       0.57      0.87      0.55      1364
weighted avg       0.97      0.77      0.84      1364


Threshold: 0.15
Confusion Matrix:
[[1063  250]
 [   4   47]]
Classification Report:
              precision    recall  f1-score   support

  No Quiebra       1.00      0.81      0.89      1313
     Quiebra       0.16      0.92      0.27        51

    accuracy                           0.81      1364
   macro avg       0.58      0.87      0.58      1364
weighted avg       0.96      0.81      0.87      1364


Threshold: 0.20
Confusion Matrix:
[[1118  195]
 [   6   45]]
Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.99      0.85      0

In [20]:
umbral = 0.2
y_pred = (y_proba >= umbral).astype(int)
print("Threshold aplicado:", umbral)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_proba))


Threshold aplicado: 0.2
Confusion Matrix:
 [[1118  195]
 [   6   45]]

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.85      0.92      1313
           1       0.19      0.88      0.31        51

    accuracy                           0.85      1364
   macro avg       0.59      0.87      0.61      1364
weighted avg       0.96      0.85      0.89      1364

ROC AUC: 0.9573047802517809


In [None]:
# GridSearch de otros modelos 

# Definir modelos y sus grids
grids = {
    
    "AdaBoost": {
        "model": AdaBoostClassifier(random_state=42),
        "params": {
            "n_estimators": [50, 100,200],
            "learning_rate": [0.5, 1.0,1.5]
        }
    },
    "GradientBoosting": {
        "model": GradientBoostingClassifier(random_state=42),
        "params": {
            "n_estimators": [50, 100,200],
            "learning_rate": [0.05, 0.1],
            "max_depth": [3, 5,10]
        }
    },
    "XGBoost": {
        "model": XGBClassifier(eval_metric='logloss', random_state=42),
        "params": {
            "n_estimators": [50, 100,200],
            "learning_rate": [0.05, 0.1],
            "max_depth": [3, 5,10]
        }
    }
}

# Resultados
resultados_grid = []

for nombre, objeto in grids.items():
    grid = GridSearchCV(objeto["model"], objeto["params"], cv=5, scoring="accuracy", n_jobs=-1)
    grid.fit(X_train_bal, y_train_bal)
    
    resultados_grid.append({
        "Modelo": nombre,
        "Mejor Accuracy": grid.best_score_,
        "Mejores Parámetros": grid.best_params_
    })

# DataFrame resumen
df_resultados_grid = pd.DataFrame(resultados_grid).sort_values(by="Mejor Accuracy", ascending=False)
df_resultados_grid


In [None]:
# Resultado GridSearch de modelos, ejecutado previamente en el otro Notebook.
'''
Modelo  Mejor Accuracy                                           Mejores Parámetros
         XGBoost        0.986001  {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 200}
GradientBoosting        0.984298 {'learning_rate': 0.1, 'max_depth': 10, 'n_estimators': 200}
        AdaBoost        0.958759                  {'learning_rate': 1.5, 'n_estimators': 200}
'''

In [31]:
# Entrenamiento de modelos con los mejores parámetros encontrados
# AdaBoost
ada = AdaBoostClassifier(n_estimators=200, learning_rate=1.5, random_state=42)
ada.fit(X_train_bal, y_train_bal)

# GradientBoosting
gb = GradientBoostingClassifier(n_estimators=200, learning_rate=0.1, max_depth=10, random_state=42)
gb.fit(X_train_bal, y_train_bal)

# XGBoost
xgb = XGBClassifier(n_estimators=200, learning_rate=0.1, max_depth=5, eval_metric='logloss', random_state=42)
xgb.fit(X_train_bal, y_train_bal)

In [33]:
modelos = {
    "AdaBoost": ada,
    "GradientBoosting": gb,
    "XGBoost": xgb
}

for nombre, modelo in modelos.items():
    y_pred = modelo.predict(X_test)

    print(f"\n Confusion Matrix - {nombre}")
    print(confusion_matrix(y_test, y_pred))
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=["No Quiebra", "Quiebra"]))


 Confusion Matrix - AdaBoost
[[1252   61]
 [  18   33]]

Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.99      0.95      0.97      1313
     Quiebra       0.35      0.65      0.46        51

    accuracy                           0.94      1364
   macro avg       0.67      0.80      0.71      1364
weighted avg       0.96      0.94      0.95      1364


 Confusion Matrix - GradientBoosting
[[1290   23]
 [  21   30]]

Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.98      0.98      0.98      1313
     Quiebra       0.57      0.59      0.58        51

    accuracy                           0.97      1364
   macro avg       0.78      0.79      0.78      1364
weighted avg       0.97      0.97      0.97      1364


 Confusion Matrix - XGBoost
[[1288   25]
 [  17   34]]

Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.99      0.98      0.

In [34]:

# Calcular las probabilidades en el test
y_proba_test = xgb.predict_proba(X_test)[:, 1]

# Probar varios thresholds
thresholds = np.linspace(0.1, 0.9, 17)

for t in thresholds:
    y_pred = (y_proba_test >= t).astype(int)
    print(f"\nThreshold: {t:.2f}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=["No Quiebra", "Quiebra"]))


Threshold: 0.10
Confusion Matrix:
[[1254   59]
 [  13   38]]
Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.99      0.96      0.97      1313
     Quiebra       0.39      0.75      0.51        51

    accuracy                           0.95      1364
   macro avg       0.69      0.85      0.74      1364
weighted avg       0.97      0.95      0.95      1364


Threshold: 0.15
Confusion Matrix:
[[1262   51]
 [  15   36]]
Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.99      0.96      0.97      1313
     Quiebra       0.41      0.71      0.52        51

    accuracy                           0.95      1364
   macro avg       0.70      0.83      0.75      1364
weighted avg       0.97      0.95      0.96      1364


Threshold: 0.20
Confusion Matrix:
[[1269   44]
 [  15   36]]
Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.99      0.97   

## Quitando columnas con correlación alta

In [None]:
df2 = df.drop(columns=[' ROA(C) before interest and depreciation before interest',
' ROA(B) before interest and depreciation after tax',
' Pre-tax net Interest Rate',
' Realized Sales Gross Margin',
' Gross Profit to Sales',
" Net Income to Stockholder's Equity",
' Current Liabilities/Equity',
' Borrowing dependency',
' Net Value Per Share (C)',
' Net Value Per Share (B)',
' After-tax Net Profit Growth Rate',
' Net worth/Assets',
' Cash Flow to Equity',
' Current Liability to Equity',
' Current Liability to Liability',
' Realized Sales Gross Margin',
' After-tax Net Profit Growth Rate',
' Continuous interest rate (after tax)',
' Persistent EPS in the Last Four Seasons',
' Operating Profit Per Share (Yuan ¥)',
' Working capitcal Turnover Rate',' Operating Profit Rate',' Net Income to Total Assets',' Operating profit/Paid-in capital',
' Cash Flow to Liability'," Net Income Flag"," Per Share Net profit before tax (Yuan ¥)"])

In [41]:
X2 = df2.drop(columns=["Bankrupt?"])
y2 = df2["Bankrupt?"]

In [42]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, test_size=0.2, random_state=10)

In [43]:
smote = SMOTE(random_state=42)
X_train_bal2, y_train_bal2= smote.fit_resample(X_train2, y_train2)

In [50]:
y_train_bal2.unique
y_train_bal2.value_counts()

Bankrupt?
0    5286
1    5286
Name: count, dtype: int64

In [44]:
model = LogisticRegression(class_weight="balanced", max_iter=5000)

model.fit(X_train_bal2, y_train_bal2)

predictions = model.predict(X_test2)

print("Accuracy:", accuracy_score(y_test2, predictions))
print("Confusion matrix:\n", confusion_matrix(y_test2, predictions))
print("Report:\n", classification_report(y_test2, predictions))

Accuracy: 0.7675953079178885
Confusion matrix:
 [[1025  288]
 [  29   22]]
Report:
               precision    recall  f1-score   support

           0       0.97      0.78      0.87      1313
           1       0.07      0.43      0.12        51

    accuracy                           0.77      1364
   macro avg       0.52      0.61      0.49      1364
weighted avg       0.94      0.77      0.84      1364



In [53]:
# Training con los mejores parámetros encontrados
rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    random_state=42,
    min_samples_leaf=10,
    min_samples_split=2,
    
    n_jobs=-1  # para aprovechar todos los núcleos del procesador
)

rf.fit(X_train_bal2, y_train_bal2)

# Evaluamos en el conjunto de test original
y_pred = rf.predict(X_test2)
y_proba = rf.predict_proba(X_test2)[:, 1]

print("Confusion Matrix:\n", confusion_matrix(y_test2, y_pred))
print("\nClassification Report:\n", classification_report(y_test2, y_pred))
print("ROC AUC:", roc_auc_score(y_test2, y_proba))

Confusion Matrix:
 [[1241   72]
 [  14   37]]

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.95      0.97      1313
           1       0.34      0.73      0.46        51

    accuracy                           0.94      1364
   macro avg       0.66      0.84      0.71      1364
weighted avg       0.96      0.94      0.95      1364

ROC AUC: 0.9581559965951346


In [54]:
xgb2 = XGBClassifier(n_estimators=200, learning_rate=0.1, max_depth=5, eval_metric='logloss', random_state=42)
xgb2.fit(X_train_bal2, y_train_bal2)

In [55]:
y_pred = xgb2.predict(X_test2)
y_proba = xgb2.predict_proba(X_test2)[:, 1]

print("Confusion Matrix:\n", confusion_matrix(y_test2, y_pred))
print("\nClassification Report:\n", classification_report(y_test2, y_pred))
print("ROC AUC:", roc_auc_score(y_test2, y_proba))

Confusion Matrix:
 [[1291   22]
 [  18   33]]

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.98      1313
           1       0.60      0.65      0.62        51

    accuracy                           0.97      1364
   macro avg       0.79      0.82      0.80      1364
weighted avg       0.97      0.97      0.97      1364

ROC AUC: 0.9580813284948405


In [56]:
# Calcular las probabilidades en el test
y_proba_test = xgb2.predict_proba(X_test2)[:, 1]

# Probar varios thresholds
thresholds = np.linspace(0.1, 0.9, 17)

for t in thresholds:
    y_pred = (y_proba_test >= t).astype(int)
    print(f"\nThreshold: {t:.2f}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test2, y_pred))
    print("Classification Report:")
    print(classification_report(y_test2, y_pred, target_names=["No Quiebra", "Quiebra"]))


Threshold: 0.10
Confusion Matrix:
[[1250   63]
 [  14   37]]
Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.99      0.95      0.97      1313
     Quiebra       0.37      0.73      0.49        51

    accuracy                           0.94      1364
   macro avg       0.68      0.84      0.73      1364
weighted avg       0.97      0.94      0.95      1364


Threshold: 0.15
Confusion Matrix:
[[1259   54]
 [  15   36]]
Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.99      0.96      0.97      1313
     Quiebra       0.40      0.71      0.51        51

    accuracy                           0.95      1364
   macro avg       0.69      0.83      0.74      1364
weighted avg       0.97      0.95      0.96      1364


Threshold: 0.20
Confusion Matrix:
[[1272   41]
 [  16   35]]
Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.99      0.97   

In [57]:

# Definir modelos y sus grids
grids = {
    "RandomForest": {
        "model": RandomForestClassifier(random_state=42),
        "params": {
            "n_estimators": [50, 100],
            "max_depth": [3, 5, None]
        }
    },
    "AdaBoost": {
        "model": AdaBoostClassifier(random_state=42),
        "params": {
            "n_estimators": [50, 100],
            "learning_rate": [0.5, 1.0]
        }
    },
    "GradientBoosting": {
        "model": GradientBoostingClassifier(random_state=42),
        "params": {
            "n_estimators": [50, 100],
            "learning_rate": [0.05, 0.1],
            "max_depth": [3, 5]
        }
    },
    "XGBoost": {
        "model": XGBClassifier(eval_metric='logloss', random_state=42),
        "params": {
            "n_estimators": [50, 100],
            "learning_rate": [0.05, 0.1],
            "max_depth": [3, 5]
        }
    }
}

# Resultados
resultados_grid2 = []

for nombre, objeto in grids.items():
    grid = GridSearchCV(objeto["model"], objeto["params"], cv=5, scoring="accuracy", n_jobs=-1)
    grid.fit(X_train_bal2, y_train_bal2)
    
    resultados_grid.append({
        "Modelo": nombre,
        "Mejor Accuracy": grid.best_score_,
        "Mejores Parámetros": grid.best_params_
    })

# DataFrame resumen
df_resultados_grid2 = pd.DataFrame(resultados_grid).sort_values(by="Mejor Accuracy", ascending=False)
df_resultados_grid2


Unnamed: 0,Modelo,Mejor Accuracy,Mejores Parámetros
4,XGBoost,0.98165,"{'learning_rate': 0.1, 'max_depth': 5, 'n_esti..."
1,RandomForest,0.981366,"{'max_depth': None, 'n_estimators': 50}"
3,GradientBoosting,0.97815,"{'learning_rate': 0.1, 'max_depth': 5, 'n_esti..."
0,AdaBoost,0.958759,"{'learning_rate': 1.5, 'n_estimators': 200}"
2,AdaBoost,0.939557,"{'learning_rate': 1.0, 'n_estimators': 100}"


In [58]:
print(df_resultados_grid2.to_string(index=False))

          Modelo  Mejor Accuracy                                          Mejores Parámetros
         XGBoost        0.981650 {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100}
    RandomForest        0.981366                     {'max_depth': None, 'n_estimators': 50}
GradientBoosting        0.978150 {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100}
        AdaBoost        0.958759                 {'learning_rate': 1.5, 'n_estimators': 200}
        AdaBoost        0.939557                 {'learning_rate': 1.0, 'n_estimators': 100}


In [59]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

param_grid = {
    "max_depth": [3, 5, 10, None],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 5, 10, 20],
    "n_estimators": [50, 100, 150]
}

grid = GridSearchCV(
    RandomForestClassifier(class_weight="balanced", random_state=42, n_jobs=-1),
    param_grid,
    scoring="roc_auc",
    cv=5,
    n_jobs=-1
)

grid.fit(X_train_bal2, y_train_bal2)




In [60]:
print("✅ Mejores parámetros encontrados:")
print(grid.best_params_)

✅ Mejores parámetros encontrados:
{'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 150}


In [66]:
# Training con los mejores parámetros encontrados
rf = RandomForestClassifier(class_weight="balanced",
    n_estimators=150,
    max_depth=None,
    random_state=42,
    min_samples_leaf=1,
    min_samples_split=2,
    
    n_jobs=-1  # para aprovechar todos los núcleos del procesador
)

rf.fit(X_train_bal2, y_train_bal2)

# Evaluamos en el conjunto de test original
y_pred = rf.predict(X_test2)
y_proba = rf.predict_proba(X_test2)[:, 1]

print("Confusion Matrix:\n", confusion_matrix(y_test2, y_pred))
print("\nClassification Report:\n", classification_report(y_test2, y_pred))
print("ROC AUC:", roc_auc_score(y_test2, y_proba))

Confusion Matrix:
 [[1283   30]
 [  18   33]]

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.98      1313
           1       0.52      0.65      0.58        51

    accuracy                           0.96      1364
   macro avg       0.75      0.81      0.78      1364
weighted avg       0.97      0.96      0.97      1364

ROC AUC: 0.954191120469513


In [68]:
# Comparación con diferentes Threshold

y_proba_test = rf.predict_proba(X_test2)[:, 1]

# Probar varios thresholds
thresholds = np.linspace(0.1, 0.9, 17)

for t in thresholds:
    y_pred = (y_proba_test >= t).astype(int)
    print(f"\nThreshold: {t:.2f}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=["No Quiebra", "Quiebra"]))


Threshold: 0.10
Confusion Matrix:
[[1069  244]
 [   3   48]]
Classification Report:
              precision    recall  f1-score   support

  No Quiebra       1.00      0.81      0.90      1313
     Quiebra       0.16      0.94      0.28        51

    accuracy                           0.82      1364
   macro avg       0.58      0.88      0.59      1364
weighted avg       0.97      0.82      0.87      1364


Threshold: 0.15
Confusion Matrix:
[[1125  188]
 [   5   46]]
Classification Report:
              precision    recall  f1-score   support

  No Quiebra       1.00      0.86      0.92      1313
     Quiebra       0.20      0.90      0.32        51

    accuracy                           0.86      1364
   macro avg       0.60      0.88      0.62      1364
weighted avg       0.97      0.86      0.90      1364


Threshold: 0.20
Confusion Matrix:
[[1178  135]
 [   8   43]]
Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.99      0.90   

In [62]:
# AdaBoost
ada2 = AdaBoostClassifier(n_estimators=200, learning_rate=1.5, random_state=42)
ada2.fit(X_train_bal2, y_train_bal2)

# GradientBoosting
gb2 = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
gb2.fit(X_train_bal2, y_train_bal2)

# XGBoost
xgb2 = XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, eval_metric='logloss', random_state=42)
xgb2.fit(X_train_bal2, y_train_bal2)


In [69]:
from sklearn.metrics import confusion_matrix, classification_report

modelos = {
    "AdaBoost": ada2,
    "GradientBoosting": gb2,
    "XGBoost": xgb2
}

for nombre, modelo in modelos.items():
    y_pred = modelo.predict(X_test2)

    print(f"\n🔍 Confusion Matrix - {nombre} (Threshold=0.5 por defecto)")
    print(confusion_matrix(y_test2, y_pred))
    
    print("\nClassification Report:")
    print(classification_report(y_test2, y_pred, target_names=["No Quiebra", "Quiebra"]))


🔍 Confusion Matrix - AdaBoost (Threshold=0.5 por defecto)
[[1253   60]
 [  18   33]]

Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.99      0.95      0.97      1313
     Quiebra       0.35      0.65      0.46        51

    accuracy                           0.94      1364
   macro avg       0.67      0.80      0.71      1364
weighted avg       0.96      0.94      0.95      1364


🔍 Confusion Matrix - GradientBoosting (Threshold=0.5 por defecto)
[[1276   37]
 [  18   33]]

Classification Report:
              precision    recall  f1-score   support

  No Quiebra       0.99      0.97      0.98      1313
     Quiebra       0.47      0.65      0.55        51

    accuracy                           0.96      1364
   macro avg       0.73      0.81      0.76      1364
weighted avg       0.97      0.96      0.96      1364


🔍 Confusion Matrix - XGBoost (Threshold=0.5 por defecto)
[[1283   30]
 [  16   35]]

Classification Report:
         