Calling the necessary libraries. 

In [None]:
# Essential imports for SVM implementation
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC, SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

Fetch the Iris Dataset
Why the Iris dataset?

Classic dataset for classification

Well-balanced classes

Good for demonstrating SVM capabilities

Small enough for quick experimentation

In [None]:
# Load the Iris dataset
iris = datasets.load_iris()
print("Feature names:", iris.feature_names)
print("Target names:", iris.target_names)
print("Dataset shape:", iris.data.shape)

# Let's examine the data structure
X = iris.data  # Features: sepal length, sepal width, petal length, petal width
y = iris.target  # Targets: 0=setosa, 1=versicolor, 2=virginica

print("\nFirst 5 samples:")
print(X[:5])
print("Corresponding labels:", y[:5])

Linear SVM Classification
Why Linear SVM?

Finds optimal hyperplane that maximizes margin between classes

Works well when data is linearly separable

C parameter controls trade-off between margin width and classification error

In [None]:
# We'll focus on two classes for binary classification (setosa vs versicolor)
X_binary = iris.data[:, :2]  # Using only sepal length and width for visualization
y_binary = (iris.target != 0) * 1  # Convert to binary: setosa(0) vs non-setosa(1)

# Create and train linear SVM
linear_svm = SVC(kernel='linear', C=1.0)
linear_svm.fit(X_binary, y_binary)

# Create mesh for decision boundary visualization
def plot_decision_boundary(clf, X, y, title):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))
    
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.contourf(xx, yy, Z, alpha=0.3)
    plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k')
    plt.xlabel('Sepal length')
    plt.ylabel('Sepal width')
    plt.title(title)
    plt.show()

plot_decision_boundary(linear_svm, X_binary, y_binary, "Linear SVM Classification")

Polynomial Kernels
Why Polynomial Kernels?

Handle non-linearly separable data

degree parameter controls complexity of decision boundary

Higher degrees = more complex boundaries (risk of overfitting)

In [None]:
# Polynomial kernel SVM
poly_svm = SVC(kernel='poly', degree=3, C=1.0, coef0=1)
poly_svm.fit(X_binary, y_binary)

plot_decision_boundary(poly_svm, X_binary, y_binary, "Polynomial Kernel SVM (degree=3)")

# Compare different polynomial degrees
degrees = [2, 3, 4]
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for idx, degree in enumerate(degrees):
    poly_svm = SVC(kernel='poly', degree=degree, C=1.0)
    poly_svm.fit(X_binary, y_binary)
    
    # Plotting code similar to above for each subplot
    x_min, x_max = X_binary[:, 0].min() - 1, X_binary[:, 0].max() + 1
    y_min, y_max = X_binary[:, 1].min() - 1, X_binary[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))
    
    Z = poly_svm.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    axes[idx].contourf(xx, yy, Z, alpha=0.3)
    axes[idx].scatter(X_binary[:, 0], X_binary[:, 1], c=y_binary, edgecolors='k')
    axes[idx].set_title(f'Polynomial degree={degree}')
    axes[idx].set_xlabel('Sepal length')
    axes[idx].set_ylabel('Sepal width')

plt.tight_layout()
plt.show()

 Gaussian Kernel (RBF)
 Why Gaussian (RBF) Kernel?

Most commonly used kernel

gamma controls influence of individual training examples

Low gamma = smooth decision boundary

High gamma = complex, wiggly boundary (risk of overfitting)

In [None]:
# RBF (Gaussian) kernel SVM
rbf_svm = SVC(kernel='rbf', gamma=0.7, C=1.0)
rbf_svm.fit(X_binary, y_binary)

plot_decision_boundary(rbf_svm, X_binary, y_binary, "RBF Kernel SVM")

# Compare different gamma values
gammas = [0.1, 1, 10]
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for idx, gamma in enumerate(gammas):
    rbf_svm = SVC(kernel='rbf', gamma=gamma, C=1.0)
    rbf_svm.fit(X_binary, y_binary)
    
    # Similar plotting code as before
    x_min, x_max = X_binary[:, 0].min() - 1, X_binary[:, 0].max() + 1
    y_min, y_max = X_binary[:, 1].min() - 1, X_binary[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))
    
    Z = rbf_svm.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    axes[idx].contourf(xx, yy, Z, alpha=0.3)
    axes[idx].scatter(X_binary[:, 0], X_binary[:, 1], c=y_binary, edgecolors='k')
    axes[idx].set_title(f'RBF gamma={gamma}')
    axes[idx].set_xlabel('Sepal length')
    axes[idx].set_ylabel('Sepal width')

plt.tight_layout()
plt.show()

SVM Regression
Why SVM Regression?

Finds a function that deviates from training data by at most ε

Tries to fit as many points as possible within a margin

Useful when you want to ignore small errors

In [None]:
# Generate synthetic data for regression
np.random.seed(42)
m = 100
X_reg = 2 * np.random.rand(m, 1) - 1
y_reg = (0.2 + 0.1 * X_reg + 0.5 * X_reg**2 + np.random.randn(m, 1)/10).ravel()

# SVM Regression with different kernels
svm_lin_reg = SVR(kernel="linear", C=100)
svm_poly_reg = SVR(kernel="poly", degree=2, C=100, coef0=1)
svm_rbf_reg = SVR(kernel="rbf", C=100, gamma=0.1)

# Fit models
svm_lin_reg.fit(X_reg, y_reg)
svm_poly_reg.fit(X_reg, y_reg)
svm_rbf_reg.fit(X_reg, y_reg)

# Plot results
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

models = [svm_lin_reg, svm_poly_reg, svm_rbf_reg]
titles = ['Linear SVR', 'Polynomial SVR (degree=2)', 'RBF SVR']

for idx, (model, title) in enumerate(zip(models, titles)):
    X_new = np.linspace(-1, 1, 100).reshape(100, 1)
    y_pred = model.predict(X_new)
    
    axes[idx].plot(X_reg, y_reg, "b.")
    axes[idx].plot(X_new, y_pred, "r-", linewidth=2, label="Predictions")
    axes[idx].set_xlabel("X")
    axes[idx].set_ylabel("y")
    axes[idx].set_title(title)
    axes[idx].legend()

plt.tight_layout()
plt.show()

Decision Function and Predictions

In [None]:
# Let's use all features for multiclass classification
X_full = iris.data
y_full = iris.target

# Scale the data (important for SVM)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_full)

# Create SVM with RBF kernel for multiclass classification
svm_multiclass = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_multiclass.fit(X_scaled, y_full)

# Decision function and predictions
print("=== Decision Function and Predictions ===")

# Get some test samples
test_samples = X_scaled[:5]
predictions = svm_multiclass.predict(test_samples)
decision_scores = svm_multiclass.decision_function(test_samples)

print("Test samples (first 5):")
print(test_samples)
print("\nPredictions:", predictions)
print("Actual labels:", y_full[:5])
print("\nDecision function scores (higher = more confident):")
print(decision_scores)

# Interpretation
print("\n=== Interpretation ===")
print("For each sample, the decision function returns scores for all 3 classes")
print("The highest score determines the predicted class")
print("Class 0: Setosa, Class 1: Versicolor, Class 2: Virginica")

# Model evaluation
from sklearn.model_selection import cross_val_score

scores = cross_val_score(svm_multiclass, X_scaled, y_full, cv=5)
print(f"\nCross-validation accuracy: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})")

In [None]:
# Run the complete analysis
def comprehensive_analysis():
    print("=== COMPREHENSIVE SVM ANALYSIS ON IRIS DATASET ===\n")
    
    # 1. Data Overview
    print("1. DATASET OVERVIEW")
    print(f"   Samples: {iris.data.shape[0]}")
    print(f"   Features: {iris.data.shape[1]}")
    print(f"   Classes: {len(np.unique(iris.target))}")
    print(f"   Class distribution: {np.bincount(iris.target)}")
    
    # 2. Feature Analysis
    print("\n2. FEATURE ANALYSIS")
    feature_names = iris.feature_names
    for i, name in enumerate(feature_names):
        print(f"   {name}: min={iris.data[:, i].min():.1f}, max={iris.data[:, i].max():.1f}, "
              f"mean={iris.data[:, i].mean():.1f}")
    
    # 3. Model Comparison
    print("\n3. MODEL COMPARISON")
    kernels = ['linear', 'poly', 'rbf']
    X_scaled = StandardScaler().fit_transform(iris.data)
    
    for kernel in kernels:
        if kernel == 'poly':
            svm = SVC(kernel=kernel, degree=3)
        else:
            svm = SVC(kernel=kernel)
        
        scores = cross_val_score(svm, X_scaled, iris.target, cv=5)
        print(f"   {kernel.upper()} kernel: {scores.mean():.3f} accuracy")
    
    # 4. Key Insights
    print("\n4. KEY INSIGHTS")
    print("   - SVMs work best with scaled features")
    print("   - RBF kernel often performs well for complex boundaries")
    print("   - Linear kernel is efficient for linearly separable data")
    print("   - Polynomial kernel can capture polynomial relationships")
    print("   - C parameter controls regularization (overfitting vs underfitting)")
    print("   - Gamma (RBF) controls influence range of training examples")

# Execute comprehensive analysis
comprehensive_analysis()