In [1]:
from sklearn.gaussian_process import GaussianProcessRegressor as GPR
from sklearn.svm import SVC
import warnings
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, KFold
import optuna
warnings.filterwarnings("ignore")

In [2]:
# Brain tumor detection using SVM
data = pd.read_csv('brain_tumor_dataset.csv')
data = data.drop(columns=["Patient_ID"])
data = pd.get_dummies(data, columns=["Gender", "Location", "Histology", "Stage", "Symptom_1", "Symptom_2", "Symptom_3"])
data["Radiation_Treatment"] = data["Radiation_Treatment"].replace({"Yes": True, "No": False})
data["Surgery_Performed"] = data["Surgery_Performed"].replace({"Yes": True, "No": False})
data["Chemotherapy"] = data["Chemotherapy"].replace({"Yes": True, "No": False})
data["Family_History"] = data["Family_History"].replace({"Yes": True, "No": False})
data["Follow_Up_Required"] = data["Follow_Up_Required"].replace({"Yes": True, "No": False})
data["MRI_Result"] = data["MRI_Result"].replace({"Positive": True, "Negative": False})
X = data.drop(columns=["Tumor_Type"])
y = data["Tumor_Type"]
y = y.replace({"Benign": 0, "Malignant": 1})
data

Unnamed: 0,Age,Tumor_Type,Tumor_Size,Radiation_Treatment,Surgery_Performed,Chemotherapy,Survival_Rate,Tumor_Growth_Rate,Family_History,MRI_Result,...,Symptom_1_Seizures,Symptom_1_Vision Issues,Symptom_2_Headache,Symptom_2_Nausea,Symptom_2_Seizures,Symptom_2_Vision Issues,Symptom_3_Headache,Symptom_3_Nausea,Symptom_3_Seizures,Symptom_3_Vision Issues
0,73,Malignant,5.375612,False,False,False,51.312579,0.111876,False,True,...,False,True,False,False,True,False,False,False,True,False
1,26,Benign,4.847098,True,True,True,46.373273,2.165736,True,True,...,False,False,True,False,False,False,False,True,False,False
2,31,Benign,5.588391,False,False,False,47.072221,1.884228,False,False,...,False,True,True,False,False,False,False,False,True,False
3,29,Malignant,1.436600,True,False,True,51.853634,1.283342,True,False,...,False,True,False,False,True,False,True,False,False,False
4,54,Benign,2.417506,False,False,True,54.708987,2.069477,False,True,...,False,False,True,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,21,Malignant,9.612013,False,False,True,58.229662,0.353806,False,False,...,True,False,False,True,False,False,False,False,False,True
19996,32,Benign,1.543560,True,True,False,77.706856,2.341074,False,True,...,False,False,True,False,False,False,False,False,False,True
19997,57,Benign,3.618634,False,False,True,89.543803,2.332881,False,True,...,True,False,False,False,False,True,False,True,False,False
19998,68,Malignant,8.519086,True,True,True,83.306781,2.387202,False,True,...,True,False,True,False,False,False,False,False,False,True


In [3]:
scaler = StandardScaler()
features_to_scale = ["Age", "Tumor_Size", "Survival_Rate", "Tumor_Growth_Rate"]
X[features_to_scale] = scaler.fit_transform(X[features_to_scale])
X

Unnamed: 0,Age,Tumor_Size,Radiation_Treatment,Surgery_Performed,Chemotherapy,Survival_Rate,Tumor_Growth_Rate,Family_History,MRI_Result,Follow_Up_Required,...,Symptom_1_Seizures,Symptom_1_Vision Issues,Symptom_2_Headache,Symptom_2_Nausea,Symptom_2_Seizures,Symptom_2_Vision Issues,Symptom_3_Headache,Symptom_3_Nausea,Symptom_3_Seizures,Symptom_3_Vision Issues
0,1.355253,0.050488,False,False,False,-1.089675,-1.717548,False,True,True,...,False,True,False,False,True,False,False,False,True,False
1,-1.347627,-0.141399,True,True,True,-1.375673,0.739298,True,True,True,...,False,False,True,False,False,False,False,True,False,False
2,-1.060087,0.127742,False,False,False,-1.335202,0.402556,False,False,False,...,False,True,True,False,False,False,False,False,True,False
3,-1.175103,-1.379648,True,False,True,-1.058346,-0.316229,True,False,False,...,False,True,False,False,True,False,True,False,False,False
4,0.262599,-1.023511,False,False,True,-0.893014,0.624153,False,True,True,...,False,False,True,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,-1.635167,1.588598,False,False,True,-0.689158,-1.428149,False,False,True,...,True,False,False,True,False,False,False,False,False,True
19996,-1.002579,-1.340814,True,True,False,0.438621,0.949040,False,True,False,...,False,False,True,False,False,False,False,False,False,True
19997,0.435123,-0.587417,False,False,True,1.124010,0.939239,False,True,True,...,True,False,False,False,False,True,False,True,False,False
19998,1.067712,1.191789,True,True,True,0.762871,1.004218,False,True,False,...,True,False,True,False,False,False,False,False,False,True


In [4]:
'''
k_folds = 10
svm = SVC(kernel='linear', C=1.0, probability=True, random_state=42)
scores = cross_val_score(svm, X, y, scoring='roc_auc_ovr', cv=k_folds)
print(f"SVM Classification ROC AUC over {k_folds} folds: {np.mean(scores):.4f} ± {np.std(scores):.4f}")
'''

'\nk_folds = 10\nsvm = SVC(kernel=\'linear\', C=1.0, probability=True, random_state=42)\nscores = cross_val_score(svm, X, y, scoring=\'roc_auc_ovr\', cv=k_folds)\nprint(f"SVM Classification ROC AUC over {k_folds} folds: {np.mean(scores):.4f} ± {np.std(scores):.4f}")\n'

In [5]:
def objective(trial):
    C = trial.suggest_loguniform('C', 0.01, 10)
    svm = SVC(kernel='rbf', C=C, probability=True, random_state=42)
    
    k_folds = 10
    cv = KFold(n_splits=k_folds, shuffle=True, random_state=42)
    scores = cross_val_score(svm, X, y, scoring='roc_auc_ovr', cv=cv, n_jobs=-1)

    return np.mean(scores)

study = optuna.create_study(direction='maximize')

study.optimize(objective, n_trials=10)

print("Best C:", study.best_params['C'])
print(f"Best CV ROC AUC: {study.best_value:.4f}")



[I 2025-10-29 19:15:57,300] A new study created in memory with name: no-name-0ad89ffb-db77-44b8-b2a9-821050573de6
[I 2025-10-29 19:18:49,156] Trial 0 finished with value: 0.5020394553169031 and parameters: {'C': 3.2162329919243304}. Best is trial 0 with value: 0.5020394553169031.
[I 2025-10-29 19:20:58,012] Trial 1 finished with value: 0.4971374873921205 and parameters: {'C': 0.012666571658726306}. Best is trial 0 with value: 0.5020394553169031.
[I 2025-10-29 19:23:14,973] Trial 2 finished with value: 0.5045267356479693 and parameters: {'C': 0.17024744778823173}. Best is trial 2 with value: 0.5045267356479693.
[I 2025-10-29 19:25:38,856] Trial 3 finished with value: 0.5059330192091533 and parameters: {'C': 1.355893463298561}. Best is trial 3 with value: 0.5059330192091533.
[I 2025-10-29 19:28:06,442] Trial 4 finished with value: 0.4967366129865646 and parameters: {'C': 0.017988237080065243}. Best is trial 3 with value: 0.5059330192091533.
[I 2025-10-29 19:30:32,523] Trial 5 finished wi

Best C: 1.5221011903413537
Best CV ROC AUC: 0.5077
