In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
# %pip install xgboost
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt

import wandb
wandb.login()

# Cargar los datos
data = pd.read_csv("./data.csv")

In [None]:
data = data.drop(columns=['Initial_EDSS', 'Final_EDSS', "Unnamed: 0"])
data = data.dropna()
data

In [None]:
X = data.drop("group", axis=1)
y = data["group"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir KFold para la validación cruzada
kfold = KFold(n_splits=6, shuffle=True, random_state=42)

# Crear el modelo base
model = XGBClassifier()


In [None]:
# Transformar las etiquetas de y_train y y_test
y_train = y_train - 1
y_test = y_test - 1

In [None]:
from sklearn.model_selection import RandomizedSearchCV

In [None]:
#  Definir la grilla de hiperparámetros
param_grid = {
    'max_depth': np.arange(3, 20),  # Aumentamos el rango máximo
    'learning_rate': np.linspace(0.001, 0.5, 20),  # Ampliamos el rango y aumentamos la resolución
    'n_estimators': np.arange(50, 501, 50),  # Aumentamos el número máximo de estimadores
    'min_child_weight': np.arange(1, 11),  # Aumentamos el rango
    'gamma': np.linspace(0, 1, 20),  # Ampliamos el rango
    'subsample': np.linspace(0.5, 1, 10),  # Ampliamos el rango inferior
    'colsample_bytree': np.linspace(0.5, 1, 10),  # Ampliamos el rango inferior
    'reg_alpha': np.logspace(-4, 2, 10),  # Ampliamos el rango y la resolución
    'reg_lambda': np.logspace(-4, 2, 10),  # Ampliamos el rango y la resolución
    'scale_pos_weight': np.logspace(-1, 1, 5)  # Agrega un hiperparámetro para manejar desequilibrios de clases
}



In [292]:
# Realizar búsqueda aleatoria de hiperparámetros con KFold CV
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, cv=kfold, scoring='roc_auc', n_iter=2500)
random_search.fit(X_train, y_train)
best_params = random_search.best_params_
print("Mejores parámetros encontrados con RandomizedSearchCV:", best_params)

Mejores parámetros encontrados con RandomizedSearchCV: {'subsample': np.float64(0.6666666666666666), 'scale_pos_weight': np.float64(1.0), 'reg_lambda': np.float64(0.21544346900318823), 'reg_alpha': np.float64(1.0), 'n_estimators': np.int64(300), 'min_child_weight': np.int64(1), 'max_depth': np.int64(12), 'learning_rate': np.float64(0.4212105263157895), 'gamma': np.float64(0.5263157894736842), 'colsample_bytree': np.float64(0.7222222222222222)}


  _data = np.array(data, dtype=dtype, copy=copy,


In [293]:
# Entrenar el modelo final con los mejores hiperparámetros y registrar en Wandb
with wandb.init(project="xgboost_project", entity="tu_entidad", config=best_params):
    model.set_params(**best_params)
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)], eval_metric='auc', verbose=True)

    # Evaluar el modelo y registrar métricas
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred) 

    auc = roc_auc_score(y_test, y_pred)

    wandb.log({
        "accuracy": accuracy,
        "recall": recall,
        "f1": f1,
        "auc": auc
    })

    # Registrar la curva ROC
    y_prob = model.predict_proba(X_test)[:, 1]
    fpr, tpr, thresholds = roc_auc_score(y_test, y_prob)
    plt.plot(fpr, tpr)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    wandb.log({"roc_curve": plt})

    # Registrar la importancia de las características
    importance_df = pd.DataFrame({'feature': X.columns, 'importance': model.feature_importances_})
    importance_df = importance_df.sort_values('importance', ascending=False) 
    wandb.log({"feature_importances": wandb.Table(dataframe=importance_df)})

[34m[1mwandb[0m: Currently logged in as: [33m48242293[0m. Use [1m`wandb login --relogin`[0m to force relogin
wandb: ERROR Error while calling W&B API: entity tu_entidad not found during upsertBucket (<Response [404]>)


CommError: It appears that you do not have permission to access the requested resource. Please reach out to the project owner to grant you access. If you have the correct permissions, verify that there are no issues with your networking setup.(Error 404: Not Found)

In [None]:
print("Accuracy:", accuracy)
print("Recall:", recall)
print("F1-score:", f1)
print("AUC:", auc)