In [1]:
from sklearn import datasets
from sklearn.metrics import (accuracy_score, confusion_matrix, f1_score, precision_score, recall_score)
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

iris = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, stratify=iris.target, random_state=42
)

kernels = {
    "Linear": SVC(kernel="linear", random_state=42),
    "Polynomial": SVC(kernel="poly", degree=3, random_state=42),
    "RBF": SVC(kernel="rbf", random_state=42),
}

results = {}
for name, clf in kernels.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    metrics = {
        "accuracy": accuracy_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred, average="macro"),
        "recall": recall_score(y_test, y_pred, average="macro"),
        "f1": f1_score(y_test, y_pred, average="macro"),
    }
    results[name] = {
        "metrics": metrics,
        "confusion_matrix": confusion_matrix(y_test, y_pred),
    }

print("Iris SVM kernel comparison (80/20 split):")
best_kernel = None
best_accuracy = -1.0
for name, info in results.items():
    metrics = info["metrics"]
    print(f"\n{name} kernel")
    print(f"  Accuracy: {metrics['accuracy']:.3f}")
    print(f"  Precision: {metrics['precision']:.3f}")
    print(f"  Recall: {metrics['recall']:.3f}")
    print(f"  F1 score: {metrics['f1']:.3f}")
    print("  Confusion matrix:")
    for row in info["confusion_matrix"]:
        print("    ", row.tolist())
    if metrics["accuracy"] > best_accuracy:
        best_accuracy = metrics["accuracy"]
        best_kernel = name

print(f"\nBest kernel: {best_kernel} (highest test accuracy of {best_accuracy:.3f})")
if best_kernel == "Linear":
    reason = "Iris is mostly linearly separable, so linear kernel already separates the classes well."
elif best_kernel == "Polynomial":
    reason = "Polynomial kernel captures mild curvature in the decision boundary that improves accuracy."
else:
    reason = "RBF flexibly adapts to non-linear regions, giving better balance across classes when overlap exists."
print("Reason: ", reason)



Iris SVM kernel comparison (80/20 split):

Linear kernel
  Accuracy: 1.000
  Precision: 1.000
  Recall: 1.000
  F1 score: 1.000
  Confusion matrix:
     [10, 0, 0]
     [0, 10, 0]
     [0, 0, 10]

Polynomial kernel
  Accuracy: 0.967
  Precision: 0.970
  Recall: 0.967
  F1 score: 0.967
  Confusion matrix:
     [10, 0, 0]
     [0, 9, 1]
     [0, 0, 10]

RBF kernel
  Accuracy: 0.967
  Precision: 0.970
  Recall: 0.967
  F1 score: 0.967
  Confusion matrix:
     [10, 0, 0]
     [0, 9, 1]
     [0, 0, 10]

Best kernel: Linear (highest test accuracy of 1.000)
Reason:  Iris is mostly linearly separable, so linear kernel already separates the classes well.


In [2]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

cancer = datasets.load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target, test_size=0.2, stratify=cancer.target, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svc_unscaled = SVC(kernel="rbf", random_state=42, gamma="scale")
svc_unscaled.fit(X_train, y_train)

svc_scaled = SVC(kernel="rbf", random_state=42, gamma="scale")
svc_scaled.fit(X_train_scaled, y_train)

print("Breast Cancer SVM without scaling:")
print("  Train accuracy:", accuracy_score(y_train, svc_unscaled.predict(X_train)))
print("  Test accuracy:", accuracy_score(y_test, svc_unscaled.predict(X_test)))
print("\nBreast Cancer SVM with StandardScaler:")
print("  Train accuracy:", accuracy_score(y_train, svc_scaled.predict(X_train_scaled)))
print("  Test accuracy:", accuracy_score(y_test, svc_scaled.predict(X_test_scaled)))

print(
    "\nFeature scaling regularizes distances before RBF acts, reducing dominance by large-magnitude features "
    "and yielding more stable accuracy on the unseen split."
)



Breast Cancer SVM without scaling:
  Train accuracy: 0.9186813186813186
  Test accuracy: 0.9298245614035088

Breast Cancer SVM with StandardScaler:
  Train accuracy: 0.9824175824175824
  Test accuracy: 0.9824561403508771

Feature scaling regularizes distances before RBF acts, reducing dominance by large-magnitude features and yielding more stable accuracy on the unseen split.
