In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('data.csv')

In [7]:
df = df.drop(columns=['id', 'Unnamed: 32'])

In [8]:
df['diagnosis'] = df['diagnosis'].map({'M':1, 'B':0})

In [9]:
X = df.drop('diagnosis', axis=1)
y = df['diagnosis']

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [3]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV


In [4]:
svm_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("svm", SVC(probability=True, random_state=42))
])


In [5]:
param_grid = {
    "svm__C": [0.1, 1, 10],
    "svm__kernel": ["linear", "rbf"],
    "svm__gamma": ["scale", "auto"]
}


In [11]:
grid_svm = GridSearchCV(
    svm_pipeline,
    param_grid,
    cv=5,
    scoring="roc_auc",
    n_jobs=-1
)

grid_svm.fit(X_train, y_train)


0,1,2
,estimator,Pipeline(step...m_state=42))])
,param_grid,"{'svm__C': [0.1, 1, ...], 'svm__gamma': ['scale', 'auto'], 'svm__kernel': ['linear', 'rbf']}"
,scoring,'roc_auc'
,n_jobs,-1
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,C,10
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,


In [12]:
print("Best Parameters:", grid_svm.best_params_)
print("Best CV ROC AUC:", grid_svm.best_score_)


Best Parameters: {'svm__C': 10, 'svm__gamma': 'scale', 'svm__kernel': 'rbf'}
Best CV ROC AUC: 0.9944272445820435


In [13]:
best_svm = grid_svm.best_estimator_

y_pred = best_svm.predict(X_test)
y_proba = best_svm.predict_proba(X_test)[:, 1]


In [14]:
from sklearn.metrics import (
    accuracy_score, confusion_matrix,
    classification_report, roc_auc_score
)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nROC AUC:", roc_auc_score(y_test, y_proba))


Accuracy: 0.9736842105263158

Confusion Matrix:
 [[72  0]
 [ 3 39]]

Classification Report:
               precision    recall  f1-score   support

           0       0.96      1.00      0.98        72
           1       1.00      0.93      0.96        42

    accuracy                           0.97       114
   macro avg       0.98      0.96      0.97       114
weighted avg       0.97      0.97      0.97       114


ROC AUC: 0.9927248677248677


In [15]:
import joblib

joblib.dump(best_svm, "svm_breast_cancer_model.pkl")

print("SVM model saved successfully!")


SVM model saved successfully!


In [16]:
loaded_svm = joblib.load("svm_breast_cancer_model.pkl")

print("SVM model loaded!")


SVM model loaded!


In [17]:
X_new = pd.DataFrame([{
    "radius_mean": 18.2,
    "texture_mean": 19.8,
    "perimeter_mean": 120.0,
    "area_mean": 1000.0,
    "smoothness_mean": 0.11,
    "compactness_mean": 0.2,
    "concavity_mean": 0.25,
    "concave points_mean": 0.15,
    "symmetry_mean": 0.2,
    "fractal_dimension_mean": 0.06,
    "radius_se": 0.6,
    "texture_se": 1.5,
    "perimeter_se": 4.5,
    "area_se": 70.0,
    "smoothness_se": 0.006,
    "compactness_se": 0.03,
    "concavity_se": 0.04,
    "concave points_se": 0.02,
    "symmetry_se": 0.025,
    "fractal_dimension_se": 0.004,
    "radius_worst": 22.0,
    "texture_worst": 30.0,
    "perimeter_worst": 150.0,
    "area_worst": 1500.0,
    "smoothness_worst": 0.15,
    "compactness_worst": 0.35,
    "concavity_worst": 0.4,
    "concave points_worst": 0.2,
    "symmetry_worst": 0.35,
    "fractal_dimension_worst": 0.09
}])


In [18]:
prediction = loaded_svm.predict(X_new)
probability = loaded_svm.predict_proba(X_new)

print("Prediction:", "Malignant" if prediction[0] == 1 else "Benign")
print("Probability:", probability)


Prediction: Malignant
Probability: [[2.65002552e-08 9.99999973e-01]]


In [19]:
y_pred_loaded = loaded_svm.predict(X_test)

print("Accuracy after loading:",
      accuracy_score(y_test, y_pred_loaded))


Accuracy after loading: 0.9736842105263158
