In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer, make_blobs
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.inspection import DecisionBoundaryDisplay
import os


In [2]:
OUTPUT_DIR = "docs/assets/svm_figs"    # sug. para GitHub Pages (crie a pasta)
os.makedirs(OUTPUT_DIR, exist_ok=True)
RANDOM_STATE = 42

In [3]:
# Uso o mesmo que o professor (breast_cancer), mas com 2 primeiras features para visualização 2D.
cancer = load_breast_cancer()
X = cancer.data[:, :2]    # somente 2 features para plot 2D (ajuste se quiser outras)
y = cancer.target

In [4]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, stratify=y, random_state=RANDOM_STATE
)

In [6]:
def plot_svm_and_save(clf, X, y, title, filename, show_support=True):
   
    fig, ax = plt.subplots(figsize=(6, 5))
    DecisionBoundaryDisplay.from_estimator(
        clf, X, response_method="predict", alpha=0.6, cmap="Pastel1", ax=ax
    )
    # pontos
    ax.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolors="k")
    if show_support and hasattr(clf, "support_vectors_") and clf.support_vectors_.size:
        sv = clf.support_vectors_
        ax.scatter(sv[:, 0], sv[:, 1], facecolors="none", edgecolors="k", s=100, linewidths=1.2,
                   label=f"Support vectors ({len(sv)})")
        ax.legend(loc="upper right")
    ax.set_title(title)
    ax.set_xticks([])
    ax.set_yticks([])
    path = os.path.join(OUTPUT_DIR, filename)
    plt.savefig(path, format="svg", transparent=True)
    plt.close(fig)
    print(f"Saved: {path}")

In [7]:
kernels = ["linear", "sigmoid", "poly", "rbf"]
for k in kernels:
    clf = SVC(kernel=k, C=1.0, random_state=RANDOM_STATE)
    clf.fit(X_train, y_train)
    # avaliar no teste
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"Kernel: {k:6s} | Test accuracy: {acc:.4f} | support vectors: {len(clf.support_)}")
    plot_svm_and_save(clf, X_scaled, y, f"SVM kernel={k} (acc={acc:.3f})", f"svm_{k}.svg")

Kernel: linear | Test accuracy: 0.8713 | support vectors: 114
Saved: docs/assets/svm_figs\svm_linear.svg
Kernel: sigmoid | Test accuracy: 0.8596 | support vectors: 86
Saved: docs/assets/svm_figs\svm_sigmoid.svg
Kernel: poly   | Test accuracy: 0.8246 | support vectors: 147
Saved: docs/assets/svm_figs\svm_poly.svg
Kernel: rbf    | Test accuracy: 0.8830 | support vectors: 118
Saved: docs/assets/svm_figs\svm_rbf.svg


In [8]:
clf_lin = SVC(kernel="linear", C=1.0, probability=True, random_state=RANDOM_STATE)
clf_lin.fit(X_train, y_train)
y_pred = clf_lin.predict(X_test)
print("\nLinear SVM - Classification report:")
print(classification_report(y_test, y_pred, target_names=cancer.target_names))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))
plot_svm_and_save(clf_lin, X_scaled, y, "SVM linear - final", "svm_linear_final.svg")


Linear SVM - Classification report:
              precision    recall  f1-score   support

   malignant       0.88      0.77      0.82        64
      benign       0.87      0.93      0.90       107

    accuracy                           0.87       171
   macro avg       0.87      0.85      0.86       171
weighted avg       0.87      0.87      0.87       171

Confusion matrix:
 [[ 49  15]
 [  7 100]]
Saved: docs/assets/svm_figs\svm_linear_final.svg


In [9]:
param_grid = [
    {"kernel": ["linear"], "C": [0.01, 0.1, 1, 10]},
    {"kernel": ["rbf"], "C": [0.1, 1, 10], "gamma": ["scale", 0.1, 1, 10]},
    {"kernel": ["poly"], "C": [0.1, 1], "degree": [2, 3], "gamma": ["scale", 0.1]},
]
grid = GridSearchCV(SVC(random_state=RANDOM_STATE), param_grid, cv=5, scoring="accuracy", n_jobs=-1)
grid.fit(X_train, y_train)
print("\nGridSearch Best params:", grid.best_params_)
best = grid.best_estimator_
y_pred_best = best.predict(X_test)
print("Best estimator test accuracy:", accuracy_score(y_test, y_pred_best))
print(classification_report(y_test, y_pred_best, target_names=cancer.target_names))
plot_svm_and_save(best, X_scaled, y, f"SVM best: {grid.best_params_}", "svm_best.svg")


GridSearch Best params: {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}
Best estimator test accuracy: 0.8947368421052632
              precision    recall  f1-score   support

   malignant       0.87      0.84      0.86        64
      benign       0.91      0.93      0.92       107

    accuracy                           0.89       171
   macro avg       0.89      0.88      0.89       171
weighted avg       0.89      0.89      0.89       171

Saved: docs/assets/svm_figs\svm_best.svg
