- Utiliza el dataset `Brain Tumor`, modela con SVC y haz Cross-Validation con kernel 'linear'
- Modela con Optimización Bayesiana ().
- El método de Cross-Validation es K-Folds con $k=10$.
- Utiliza el AUC como métrico de Cross-Validation.
- Compara resultados.

In [17]:
# Paso 1: Preprocesamiento y modelado con SVC

import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer, roc_auc_score
from bayes_opt import BayesianOptimization

df = pd.read_csv("brain_tumor_dataset.csv")

# Tomamos solo las primeras 20 filas del dataset original
df_small = df.iloc[:20, :].copy()

# --- Paso 1: Codificar la variable objetivo ---
le = LabelEncoder()
df_small["Tumor_Type"] = le.fit_transform(df_small["Tumor_Type"])

# --- Paso 2: Convertir variables categóricas a numéricas ---
# get_dummies convierte texto o categorías en variables binarias (0/1)
df_encoded = pd.get_dummies(df_small, drop_first=True)

# --- Paso 3: Separar X e y ---
y_encoded = df_encoded["Tumor_Type"]
X_encoded = df_encoded.drop(columns=["Tumor_Type"])

# --- Paso 4: Definir el modelo ---
svc_model = SVC(kernel="linear", probability=True, random_state=42)

# --- Paso 5: Pipeline con estandarización ---
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("svc", svc_model)
])

# --- Paso 6: Cross-Validation (K=10) con AUC ---
kf = KFold(n_splits=10, shuffle=True, random_state=42)
auc_scorer = make_scorer(roc_auc_score, needs_proba=True)

auc_scores = cross_val_score(pipeline, X_encoded, y_encoded, cv=kf, scoring=auc_scorer)

# --- Resultados ---
print("AUC por fold:", auc_scores)
print("AUC promedio:", auc_scores.mean())

AUC por fold: [nan  1.  1. nan nan nan nan nan  1. nan]
AUC promedio: nan


Traceback (most recent call last):
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 139, in __call__
    score = scorer._score(
            ^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 376, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 213, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_ranking.py", line 640, in roc_auc_score
    return _average_binary_score(
           ^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_base.py", line 76, in _average_binary_score
    return binary_metric(y_true, y_score, sample_weight=sample_weight)
           ^^^^^^^^^^^^^^

In [18]:
# Paso 2: Optimización Bayesiana para SVC (AUC)

# --- Definir la función objetivo a maximizar ---
def objective_function(C):
    # Definir el modelo dentro de un pipeline
    svc = Pipeline([
        ('scaler', StandardScaler()),
        ('svc', SVC(kernel='linear', C=C, probability=True, random_state=42))
    ])
    
    # Validación cruzada
    kf = KFold(n_splits=10, shuffle=True, random_state=42)
    auc_scorer = make_scorer(roc_auc_score, needs_proba=True)
    scores = cross_val_score(svc, X_encoded, y_encoded, cv=kf, scoring=auc_scorer)
    
    # Regresamos el promedio del AUC
    return scores.mean()

# --- Definir los rangos de búsqueda ---
pbounds = {'C': (0.01, 10)}  # C es la penalización del margen

# --- Crear el optimizador bayesiano ---
optimizer = BayesianOptimization(
    f=objective_function,
    pbounds=pbounds,
    random_state=42,
    verbose=2
)

# --- Ejecutar la optimización ---
optimizer.maximize(
    init_points=5,  # exploraciones aleatorias iniciales
    n_iter=15,      # número de iteraciones de optimización
)

# --- Mostrar los mejores resultados ---
print("\nMejor resultado encontrado:")
print(optimizer.max)

|   iter    |  target   |     C     |
-------------------------------------
| [39m1        [39m | [39mnan      [39m | [39m3.7516557[39m |


Traceback (most recent call last):
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 139, in __call__
    score = scorer._score(
            ^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 376, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 213, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_ranking.py", line 640, in roc_auc_score
    return _average_binary_score(
           ^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_base.py", line 76, in _average_binary_score
    return binary_metric(y_true, y_score, sample_weight=sample_weight)
           ^^^^^^^^^^^^^^

| [39m2        [39m | [39mnan      [39m | [39m9.5076359[39m |
| [39m3        [39m | [39mnan      [39m | [39m7.3226194[39m |


Traceback (most recent call last):
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 139, in __call__
    score = scorer._score(
            ^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 376, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 213, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_ranking.py", line 640, in roc_auc_score
    return _average_binary_score(
           ^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_base.py", line 76, in _average_binary_score
    return binary_metric(y_true, y_score, sample_weight=sample_weight)
           ^^^^^^^^^^^^^^

| [39m4        [39m | [39mnan      [39m | [39m5.9905982[39m |
| [39m5        [39m | [39mnan      [39m | [39m1.5686262[39m |


Traceback (most recent call last):
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 139, in __call__
    score = scorer._score(
            ^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_scorer.py", line 376, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 213, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_ranking.py", line 640, in roc_auc_score
    return _average_binary_score(
           ^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\chino\anaconda3\Lib\site-packages\sklearn\metrics\_base.py", line 76, in _average_binary_score
    return binary_metric(y_true, y_score, sample_weight=sample_weight)
           ^^^^^^^^^^^^^^

ValueError: Input y contains NaN.