In [1]:
from sklearn.gaussian_process import GaussianProcessRegressor as GPR
from sklearn.svm import SVC
import warnings
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, KFold
import optuna
warnings.filterwarnings("ignore")

In [2]:
# Brain tumor detection using SVM
data = pd.read_csv('brain_tumor_dataset.csv')
data = data.drop(columns=["Patient_ID"])
data = data.head(5000)
data = pd.get_dummies(data, columns=["Gender", "Location", "Histology", "Stage", "Symptom_1", "Symptom_2", "Symptom_3"])
data["Radiation_Treatment"] = data["Radiation_Treatment"].replace({"Yes": True, "No": False})
data["Surgery_Performed"] = data["Surgery_Performed"].replace({"Yes": True, "No": False})
data["Chemotherapy"] = data["Chemotherapy"].replace({"Yes": True, "No": False})
data["Family_History"] = data["Family_History"].replace({"Yes": True, "No": False})
data["Follow_Up_Required"] = data["Follow_Up_Required"].replace({"Yes": True, "No": False})
data["MRI_Result"] = data["MRI_Result"].replace({"Positive": True, "Negative": False})
X = data.drop(columns=["Tumor_Type"])
y = data["Tumor_Type"]
y = y.replace({"Benign": 0, "Malignant": 1})
data

Unnamed: 0,Age,Tumor_Type,Tumor_Size,Radiation_Treatment,Surgery_Performed,Chemotherapy,Survival_Rate,Tumor_Growth_Rate,Family_History,MRI_Result,...,Symptom_1_Seizures,Symptom_1_Vision Issues,Symptom_2_Headache,Symptom_2_Nausea,Symptom_2_Seizures,Symptom_2_Vision Issues,Symptom_3_Headache,Symptom_3_Nausea,Symptom_3_Seizures,Symptom_3_Vision Issues
0,73,Malignant,5.375612,False,False,False,51.312579,0.111876,False,True,...,False,True,False,False,True,False,False,False,True,False
1,26,Benign,4.847098,True,True,True,46.373273,2.165736,True,True,...,False,False,True,False,False,False,False,True,False,False
2,31,Benign,5.588391,False,False,False,47.072221,1.884228,False,False,...,False,True,True,False,False,False,False,False,True,False
3,29,Malignant,1.436600,True,False,True,51.853634,1.283342,True,False,...,False,True,False,False,True,False,True,False,False,False
4,54,Benign,2.417506,False,False,True,54.708987,2.069477,False,True,...,False,False,True,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,73,Malignant,3.849000,True,True,True,62.976494,0.857235,True,False,...,True,False,False,True,False,False,False,False,True,False
4996,36,Benign,7.378581,False,False,False,66.567587,1.749408,True,False,...,False,False,False,False,True,False,False,True,False,False
4997,63,Benign,6.477871,False,False,True,82.684034,1.799311,True,True,...,False,True,True,False,False,False,False,False,True,False
4998,78,Benign,1.678021,True,True,True,96.928472,1.155946,True,True,...,True,False,False,True,False,False,False,False,False,True


In [3]:
scaler = StandardScaler()
features_to_scale = ["Age", "Tumor_Size", "Survival_Rate", "Tumor_Growth_Rate"]
X[features_to_scale] = scaler.fit_transform(X[features_to_scale])
X

Unnamed: 0,Age,Tumor_Size,Radiation_Treatment,Surgery_Performed,Chemotherapy,Survival_Rate,Tumor_Growth_Rate,Family_History,MRI_Result,Follow_Up_Required,...,Symptom_1_Seizures,Symptom_1_Vision Issues,Symptom_2_Headache,Symptom_2_Nausea,Symptom_2_Seizures,Symptom_2_Vision Issues,Symptom_3_Headache,Symptom_3_Nausea,Symptom_3_Seizures,Symptom_3_Vision Issues
0,1.355703,0.052555,False,False,False,-1.086358,-1.716086,False,True,True,...,False,True,False,False,True,False,False,False,True,False
1,-1.365592,-0.141288,True,True,True,-1.371508,0.749913,True,True,True,...,False,False,True,False,False,False,False,True,False,False
2,-1.076093,0.130597,False,False,False,-1.331157,0.411917,False,False,False,...,False,True,True,False,False,False,False,False,True,False
3,-1.191893,-1.392159,True,False,True,-1.055122,-0.309547,True,False,False,...,False,True,False,False,True,False,True,False,False,False
4,0.255605,-1.032391,False,False,True,-0.890280,0.634339,False,True,True,...,False,False,True,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,1.355703,-0.507361,True,True,True,-0.412989,-0.821159,True,False,False,...,True,False,False,True,False,False,False,False,True,False
4996,-0.786593,0.787186,False,False,False,-0.205672,0.250043,True,False,True,...,False,False,False,False,True,False,False,True,False,False
4997,0.776704,0.456832,False,False,True,0.724745,0.309960,True,True,True,...,False,True,True,False,False,False,False,False,True,False
4998,1.645202,-1.303613,True,True,True,1.547090,-0.462507,True,True,True,...,True,False,False,True,False,False,False,False,False,True


In [4]:
'''
k_folds = 10
svm = SVC(kernel='linear', C=1.0, probability=True, random_state=42)
scores = cross_val_score(svm, X, y, scoring='roc_auc_ovr', cv=k_folds)
print(f"SVM Classification ROC AUC over {k_folds} folds: {np.mean(scores):.4f} ± {np.std(scores):.4f}")
'''

'\nk_folds = 10\nsvm = SVC(kernel=\'linear\', C=1.0, probability=True, random_state=42)\nscores = cross_val_score(svm, X, y, scoring=\'roc_auc_ovr\', cv=k_folds)\nprint(f"SVM Classification ROC AUC over {k_folds} folds: {np.mean(scores):.4f} ± {np.std(scores):.4f}")\n'

In [5]:
def objective(trial):
    C = trial.suggest_loguniform('C', 0.01, 10)
    svm = SVC(kernel='rbf', C=C, probability=True, random_state=42)
    
    k_folds = 10
    cv = KFold(n_splits=k_folds, shuffle=True, random_state=42)
    scores = cross_val_score(svm, X, y, scoring='roc_auc_ovr', cv=cv, n_jobs=-1)

    return np.mean(scores)

study = optuna.create_study(direction='maximize')

study.optimize(objective, n_trials=10)

print("Best C:", study.best_params['C'])
print(f"Best CV ROC AUC: {study.best_value:.4f}")



[I 2025-10-27 17:48:24,269] A new study created in memory with name: no-name-c223ec85-3f15-44cf-849f-4e282fc02e7f
[I 2025-10-27 17:48:29,539] Trial 0 finished with value: 0.4953824507110749 and parameters: {'C': 0.23710694035992186}. Best is trial 0 with value: 0.4953824507110749.
[I 2025-10-27 17:48:33,771] Trial 1 finished with value: 0.5032704485543243 and parameters: {'C': 0.021962866418239142}. Best is trial 1 with value: 0.5032704485543243.
[I 2025-10-27 17:48:38,189] Trial 2 finished with value: 0.5027871128624167 and parameters: {'C': 0.02135145162960516}. Best is trial 1 with value: 0.5032704485543243.
[I 2025-10-27 17:48:42,644] Trial 3 finished with value: 0.5038862000003238 and parameters: {'C': 0.012218499849017888}. Best is trial 3 with value: 0.5038862000003238.
[I 2025-10-27 17:48:47,166] Trial 4 finished with value: 0.5085990172127376 and parameters: {'C': 0.09946011718676756}. Best is trial 4 with value: 0.5085990172127376.
[I 2025-10-27 17:48:51,677] Trial 5 finished

Best C: 0.11624487294648601
Best CV ROC AUC: 0.5087
