# Classification using SVM
### Author: Prof. Sandro Camargo <github.com/sandrocamargo>
### Data Mining Course <https://moodle.unipampa.edu.br/moodle/course/view.php?id=5213>
#### This script uses the basic concepts of KNN.
In this script, we used the iris dataset https://archive.ics.uci.edu/dataset/53/iris

Algorithm documentation: https://scikit-learn.org/stable/modules/svm.html

To open this notebook in your Google Colab environment, [click here](https://colab.research.google.com/github/Sandrocamargo/data-mining/blob/main/Python/md05_Classification_SVM.ipynb).

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.decomposition import PCA

In [None]:
# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target
target_names = iris.target_names

In [None]:
# Reduce dimensions for visualization (PCA: 2 components)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Split data (use PCA data for visualization)
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.3, random_state=42, stratify=y)

In [None]:
# Create and train the SVM model
svm_model = SVC(kernel='linear', C=1.0)
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=target_names))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
# Plotting decision regions
def plot_decision_boundaries(X, y, model, title):
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 300),
                         np.linspace(y_min, y_max, 300))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, alpha=0.3, cmap='coolwarm')
    scatter = plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolor='k')

    # Ajustar os rótulos da legenda com base nas classes únicas presentes
    classes_presentes = np.unique(y)
    legend_labels = [iris.target_names[i] for i in classes_presentes]
    handles = [plt.Line2D([0], [0], marker='o', color='w', label=label,
                          markerfacecolor=plt.cm.coolwarm(i / 2), markersize=8, markeredgecolor='k')
               for i, label in zip(classes_presentes, legend_labels)]
    plt.legend(handles=handles, title="Classes")

    plt.xlabel(f"PC 1 ({pca.explained_variance_ratio_[0]*100:.2f}%)")
    plt.ylabel(f"PC 2 ({pca.explained_variance_ratio_[1]*100:.2f}%)")
    plt.title(title)
    plt.grid(True)
    plt.show()


# Plot decision boundaries using PCA features
plot_decision_boundaries(X_test, y_test, svm_model, "SVM Decision Boundaries (Test Set, PCA Projection)")

# Confusion Matrix Heatmap
plt.figure(figsize=(6, 5))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d',
            xticklabels=target_names, yticklabels=target_names, cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.tight_layout()
plt.show()

In [None]:
# Create and train the SVM model
svm_model = SVC(kernel='rbf')
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=target_names))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
# Function to plot decision boundaries
def plot_decision_boundaries(X, y, model, title):
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 300),
                         np.linspace(y_min, y_max, 300))

    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, alpha=0.3, cmap='coolwarm')
    scatter = plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolor='k')

    # Legend
    classes = np.unique(y)
    handles = [plt.Line2D([0], [0], marker='o', label=iris.target_names[i],
                          color='w', markerfacecolor=plt.cm.coolwarm(i / 2),
                          markeredgecolor='k', markersize=8)
               for i in classes]
    plt.legend(handles=handles, title="Classes")

    # Axis labels with explained variance
    plt.xlabel(f"PC1 ({pca.explained_variance_ratio_[0]*100:.2f}%)")
    plt.ylabel(f"PC2 ({pca.explained_variance_ratio_[1]*100:.2f}%)")
    plt.title(title)
    plt.grid(True)
    plt.show()

# Plot the decision boundaries
plot_decision_boundaries(X_test, y_test, svm_model, "SVM Decision Boundaries (Test Set, PCA)")