In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score
import seaborn as sns
import matplotlib.pyplot as plt
import xgboost as xgb


data = pd.read_csv("./models/data.csv")

In [None]:
import wandb

# Initialize Weights & Biases
wandb.init(project="model-comparison")

In [None]:
data = data.drop(columns=['Initial_EDSS', 'Final_EDSS', "Unnamed: 0"])
data = data.dropna()
data

In [None]:
# Separar en características (X) y etiqueta (y)
X = data.drop(columns=['group'])
y = data['group']

seed = 42
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=seed)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=seed)

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

# Fit the encoder to the entire training data
encoder.fit(y_train)

# Encode the training labels
X_train_encoded = encoder.transform(y_train)
X_val_encoded = encoder.transform(y_val)
X_test_encoded = encoder.transform(y_train)

# Codifica la variable y_train
y_train_encoded = encoder.fit_transform(y_train)
y_val_encoded = encoder.transform(y_val)
y_test_encoded = encoder.transform(y_test)


In [None]:
print(X_test_reshaped.shape, y_test_encoded.shape)

In [None]:
X_train_encoded_reshaped = X_train_encoded.reshape(-1, 1)
X_val_reshaped = X_val_encoded.reshape(-1, 1) 
# X_test_reshaped = X_test_encoded.reshape(-1, 1) 

X_test_reshaped = X_test_encoded.reshape(-1, 1)
y_test_reshaped = y_test_encoded.reshape(-1, 1)  




In [None]:
print(X_test_reshaped.shape, y_test_encoded.shape)

In [None]:
print(f"y_test shape: {y_test.shape}")
print(f"y_pred_test_xgb shape: {y_pred_test_xgb.shape}")

In [None]:


# Escalado
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Definir los modelos
models = {
    "Regresion logistica": LogisticRegression(),
    "SVC": SVC(probability=True),  # Para calcular las probabilidades
    "RandomForest": RandomForestClassifier(),
    # "XGBoost": xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# # Entrenar el modelo XGBoost por separado (sin escalar)
# xgb_model = xgb.XGBClassifier(use_label_encoder=True, eval_metric='logloss')
# xgb_model.fit(X_train_encoded_reshaped, y_train_encoded)

# y_pred_train_xgb = xgb_model.predict(X_train_encoded_reshaped)
# y_pred_val_xgb = xgb_model.predict(X_val_reshaped) 
# y_pred_test_xgb = xgb_model.predict(X_test_reshaped)



In [None]:
# # Calcular métricas para XGBoost
# auc_train_xgb = roc_auc_score(y_train, xgb_model.predict_proba(X_train_encoded_reshaped)[:, 1], multi_class='ovr')
# recall_train_xgb = recall_score(y_train, y_pred_train_xgb, average='weighted')
# f1_train_xgb = f1_score(y_train, y_pred_train_xgb, average='weighted')
# accuracy_train_xgb = accuracy_score(y_train, y_pred_train_xgb)

# auc_val_xgb = roc_auc_score(y_val, xgb_model.predict_proba(X_val_reshaped)[:, 1], multi_class='ovr')
# recall_val_xgb = recall_score(y_val, y_pred_val_xgb, average='weighted')
# f1_val_xgb = f1_score(y_val, y_pred_val_xgb, average='weighted')
# accuracy_val_xgb = accuracy_score(y_val, y_pred_val_xgb)

# auc_test_xgb = roc_auc_score(y_test, xgb_model.predict_proba(y_test_encoded)[:, 1], multi_class='ovr')
# recall_test_xgb = recall_score(y_test, y_pred_test_xgb, average='weighted')
# f1_test_xgb = f1_score(y_test, y_pred_test_xgb, average='weighted')
# accuracy_test_xgb = accuracy_score(y_test, y_pred_test_xgb)

# # Registrar en Weights & Biases para XGBoost
# wandb.log({
#     "model": "XGBoost",
#     "accuracy_train": accuracy_train_xgb,
#     "recall_train": recall_train_xgb,
#     "f1_train": f1_train_xgb,
#     "auc_train": auc_train_xgb,
#     "accuracy_val": accuracy_val_xgb,
#     "recall_val": recall_val_xgb,
#     "f1_val": f1_val_xgb,
#     "auc_val": auc_val_xgb,
#     # "accuracy_test": accuracy_test_xgb,
#     # "recall_test": recall_test_xgb,
#     # "f1_test": f1_test_xgb,
#     "auc_test": auc_test_xgb,
# })
# # Entrenar y evaluar cada modelo
# results = []

In [None]:
for model_name, model in models.items():
    if model_name == 'XGBoost':
        continue 
    model.fit(X_train_scaled, y_train)
    y_pred_train = model.predict(X_train_scaled)
    y_pred_val = model.predict(X_val_scaled)
    y_pred_test = model.predict(X_test_scaled)


    accuracy_train = accuracy_score(y_train, y_pred_train)
    recall_train = recall_score(y_train, y_pred_train, average='weighted')
    f1_train = f1_score(y_train, y_pred_train, average='weighted')
    auc_train = roc_auc_score(y_train, model.predict_proba(X_train_scaled)[:, 1], multi_class='ovr')

    accuracy_val = accuracy_score(y_val, y_pred_val)
    recall_val = recall_score(y_val, y_pred_val, average='weighted')
    f1_val = f1_score(y_val, y_pred_val, average='weighted')
    auc_val = roc_auc_score(y_val, model.predict_proba(X_val_scaled)[:, 1], multi_class='ovr')

    accuracy_test = accuracy_score(y_test, y_pred_test)
    recall_test = recall_score(y_test, y_pred_test, average='weighted')
    f1_test = f1_score(y_test, y_pred_test, average='weighted')
    auc_test = roc_auc_score(y_test, model.predict_proba(X_test_scaled)[:, 1], multi_class='ovr')



    # Registrar en Weights & Biases
    wandb.log({
        "model": model_name,
        "accuracy_train": accuracy_train,
        "recall_train": recall_train,
        "f1_train": f1_train,
        "auc_train": auc_train,
        "accuracy_val": accuracy_val,
        "recall_val": recall_val,
        "f1_val": f1_val,
        "auc_val": auc_val,
        "accuracy_test": accuracy_test,
        "recall_test": recall_test,
        "f1_test": f1_test,
        "auc_test": auc_test,
    })

    # Guardar los resultados
    results.append({
        "model": model_name,
        "metric": "accuracy",
        "value": accuracy_test
    })
    results.append({
        "model": model_name,
        "metric": "recall",
        "value": recall_test
    })
    results.append({
        "model": model_name,
        "metric": "f1-score",
        "value": f1_test
    })
    results.append({
        "model": model_name,
        "metric": "auc",
        "value": auc_test
    })
    # # Agregar resultados de XGBoost al DataFrame
    # results.append({
    #     "model": "XGBoost",
    #     "metric": "accuracy",
    #     # "value": accuracy_test_xgb
    #     # recall_train_xgb
# })

# Convertir los resultados a un DataFrame
results_df = pd.DataFrame(results)

# Visualización con Seaborn
sns.barplot(x='metric', y='value', hue='model', data=results_df)
plt.title('Comparación de Métricas de los modelos')
plt.show()

In [None]:
results_df

In [None]:
# wandb.finish()