Apply an SVM model for classification. Experiment with at least two different kernel
functions (e.g., linear, RBF) and analyze how the choice of kernel impacts the model's
accuracy and training time.

1. Imports and Data Preparation

In [1]:
import pandas as pd
import numpy as np
import time
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# 1. Load the dataset
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

# 2. Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 3. Scale the data
# This is CRITICAL for SVMs to perform correctly
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training data shape: {X_train_scaled.shape}")
print(f"Testing data shape: {X_test_scaled.shape}")

Training data shape: (398, 30)
Testing data shape: (171, 30)


2. Experiment 1: Linear Kernel

In [2]:
# --- Model 1: Linear Kernel ---
print("\n--- Training SVM with Linear Kernel ---")

# Record start time
start_time_linear = time.time()

# Initialize and train the model
svm_linear = SVC(kernel='linear', random_state=42)
svm_linear.fit(X_train_scaled, y_train)

# Record end time and calculate duration
end_time_linear = time.time()
time_linear = end_time_linear - start_time_linear

# Make predictions
y_pred_linear = svm_linear.predict(X_test_scaled)

# Evaluate
acc_linear = accuracy_score(y_test, y_pred_linear)
print(f"Training Time: {time_linear:.6f} seconds")
print(f"Accuracy: {acc_linear:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred_linear, target_names=cancer.target_names))


--- Training SVM with Linear Kernel ---
Training Time: 0.013000 seconds
Accuracy: 0.9766
Classification Report:
              precision    recall  f1-score   support

   malignant       0.97      0.97      0.97        63
      benign       0.98      0.98      0.98       108

    accuracy                           0.98       171
   macro avg       0.97      0.97      0.97       171
weighted avg       0.98      0.98      0.98       171



3. Experiment 2: RBF Kernel

In [3]:
# --- Model 2: RBF Kernel ---
print("\n--- Training SVM with RBF Kernel ---")

# Record start time
start_time_rbf = time.time()

# Initialize and train the model
svm_rbf = SVC(kernel='rbf', random_state=42)
svm_rbf.fit(X_train_scaled, y_train)

# Record end time and calculate duration
end_time_rbf = time.time()
time_rbf = end_time_rbf - start_time_rbf

# Make predictions
y_pred_rbf = svm_rbf.predict(X_test_scaled)

# Evaluate
acc_rbf = accuracy_score(y_test, y_pred_rbf)
print(f"Training Time: {time_rbf:.6f} seconds")
print(f"Accuracy: {acc_rbf:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred_rbf, target_names=cancer.target_names))


--- Training SVM with RBF Kernel ---
Training Time: 0.003000 seconds
Accuracy: 0.9766
Classification Report:
              precision    recall  f1-score   support

   malignant       0.97      0.97      0.97        63
      benign       0.98      0.98      0.98       108

    accuracy                           0.98       171
   macro avg       0.97      0.97      0.97       171
weighted avg       0.98      0.98      0.98       171

