In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures
from sklearn.metrics import make_scorer, roc_auc_score

In [5]:
df = pd.read_csv('brain_tumor_dataset.csv')
df.head(10)

Unnamed: 0,Patient_ID,Age,Gender,Tumor_Type,Tumor_Size,Location,Histology,Stage,Symptom_1,Symptom_2,Symptom_3,Radiation_Treatment,Surgery_Performed,Chemotherapy,Survival_Rate,Tumor_Growth_Rate,Family_History,MRI_Result,Follow_Up_Required
0,1,73,Male,Malignant,5.375612,Temporal,Astrocytoma,III,Vision Issues,Seizures,Seizures,No,No,No,51.312579,0.111876,No,Positive,Yes
1,2,26,Male,Benign,4.847098,Parietal,Glioblastoma,II,Headache,Headache,Nausea,Yes,Yes,Yes,46.373273,2.165736,Yes,Positive,Yes
2,3,31,Male,Benign,5.588391,Parietal,Meningioma,I,Vision Issues,Headache,Seizures,No,No,No,47.072221,1.884228,No,Negative,No
3,4,29,Male,Malignant,1.4366,Temporal,Medulloblastoma,IV,Vision Issues,Seizures,Headache,Yes,No,Yes,51.853634,1.283342,Yes,Negative,No
4,5,54,Female,Benign,2.417506,Parietal,Glioblastoma,I,Headache,Headache,Seizures,No,No,Yes,54.708987,2.069477,No,Positive,Yes
5,6,27,Male,Malignant,3.483837,Frontal,Astrocytoma,I,Vision Issues,Vision Issues,Headache,Yes,Yes,No,83.572069,2.26138,No,Negative,Yes
6,7,72,Male,Malignant,8.026672,Occipital,Meningioma,IV,Vision Issues,Headache,Nausea,Yes,No,Yes,69.825016,2.965008,No,Negative,No
7,8,61,Male,Benign,6.319362,Occipital,Astrocytoma,I,Headache,Seizures,Nausea,Yes,Yes,No,62.775921,2.269111,No,Negative,No
8,9,50,Male,Benign,8.075229,Temporal,Medulloblastoma,IV,Vision Issues,Vision Issues,Nausea,No,Yes,Yes,44.91309,1.208336,No,Negative,Yes
9,10,54,Male,Benign,8.549382,Frontal,Meningioma,I,Seizures,Vision Issues,Vision Issues,Yes,No,No,85.087322,2.764735,Yes,Positive,Yes


In [6]:
df = pd.read_csv('brain_tumor_dataset.csv')
X = df.drop(columns=['Patient_ID', 'Tumor_Type'])
y = df['Tumor_Type']

In [7]:
# Codificación de variables categóricas
X = X.apply(lambda col: LabelEncoder().fit_transform(col) if col.dtypes == 'object' else col)
y = LabelEncoder().fit_transform(y)

In [8]:
# Escalamiento
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [9]:
# INGENIERÍA DE CARACTERÍSTICAS
# Se agregan características polinómicas e interacciones
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

In [10]:
#FUNCIÓN DE EVALUACIÓN (AUC MULTICLASE)
def evaluate_model(C_value):
    model = SVC(kernel='linear', C=C_value, probability=True)
    kf = KFold(n_splits=10, shuffle=True, random_state=42)
    auc_scores = cross_val_score(
        model, X_poly, y, cv=kf, scoring='roc_auc_ovr'
    )

    return auc_scores.mean()

In [11]:
# OPTIMIZACIÓN BAYESIANA MANUAL
C_values = np.logspace(-3, 3, 15)  #valores entre 0.001(-3) y 1000(3)
results = []

for c in C_values:
    auc = evaluate_model(c)
    results.append((c, auc))
    print(f"C={c:.4f} -> AUC promedio={auc:.4f}")

C=0.0010 -> AUC promedio=1.0000
C=0.0027 -> AUC promedio=1.0000
C=0.0072 -> AUC promedio=1.0000
C=0.0193 -> AUC promedio=1.0000
C=0.0518 -> AUC promedio=1.0000
C=0.1389 -> AUC promedio=1.0000
C=0.3728 -> AUC promedio=1.0000
C=1.0000 -> AUC promedio=1.0000
C=2.6827 -> AUC promedio=1.0000
C=7.1969 -> AUC promedio=1.0000
C=19.3070 -> AUC promedio=1.0000
C=51.7947 -> AUC promedio=1.0000
C=138.9495 -> AUC promedio=1.0000
C=372.7594 -> AUC promedio=1.0000
C=1000.0000 -> AUC promedio=1.0000


In [16]:
best_C, best_auc = max(results, key=lambda x: x[1])
print(f"Mejor valor de C: {best_C:.4f}")
print(f"Mejor AUC promedio (10-fold): {best_auc:.4f}")

Mejor valor de C: 0.0010
Mejor AUC promedio (10-fold): 1.0000
