# Assignment 8 - SVM (Support Vector Machine) Solutions
This notebook contains solutions for all questions in the SVM Lab Assignment.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, confusion_matrix, classification_report)
import warnings
warnings.filterwarnings('ignore')

sns.set_style('whitegrid')
print("Libraries imported successfully!")

---
# Question 1: Iris Dataset - Comparing SVM Kernels

## Load and Prepare Iris Dataset

In [None]:
# Load Iris dataset
iris = datasets.load_iris()
X_iris = iris.data
y_iris = iris.target

print(f"Iris Dataset Shape: {X_iris.shape}")
print(f"Number of classes: {len(np.unique(y_iris))}")
print(f"Class names: {iris.target_names}")
print(f"Feature names: {iris.feature_names}")

In [None]:
# Train-test split (80:20)
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(
    X_iris, y_iris, test_size=0.2, random_state=42, stratify=y_iris
)

print(f"Training set size: {X_train_iris.shape[0]}")
print(f"Testing set size: {X_test_iris.shape[0]}")

## Train SVM with Different Kernels

In [None]:
# Define kernels to test
kernels = ['linear', 'poly', 'rbf']
kernel_results = {}

for kernel in kernels:
    print(f"\n{'='*60}")
    print(f"Training SVM with {kernel.upper()} kernel")
    print(f"{'='*60}")

    # Create and train SVM model
    if kernel == 'poly':
        svm_model = SVC(kernel=kernel, degree=3, random_state=42)
    else:
        svm_model = SVC(kernel=kernel, random_state=42)

    svm_model.fit(X_train_iris, y_train_iris)

    # Make predictions
    y_pred = svm_model.predict(X_test_iris)

    # Calculate metrics
    accuracy = accuracy_score(y_test_iris, y_pred)
    precision = precision_score(y_test_iris, y_pred, average='weighted')
    recall = recall_score(y_test_iris, y_pred, average='weighted')
    f1 = f1_score(y_test_iris, y_pred, average='weighted')
    cm = confusion_matrix(y_test_iris, y_pred)

    # Store results
    kernel_results[kernel] = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'confusion_matrix': cm
    }

    # Print results
    print(f"\nPerformance Metrics:")
    print(f"  Accuracy:  {accuracy:.4f}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall:    {recall:.4f}")
    print(f"  F1-Score:  {f1:.4f}")
    print(f"\nConfusion Matrix:\n{cm}")

In [None]:
# Plot confusion matrices for all kernels
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
fig.suptitle('Confusion Matrices for Different SVM Kernels (Iris Dataset)', fontsize=14, fontweight='bold')

for idx, kernel in enumerate(kernels):
    cm = kernel_results[kernel]['confusion_matrix']
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[idx],
                xticklabels=iris.target_names, yticklabels=iris.target_names)
    axes[idx].set_title(f'{kernel.upper()} Kernel\nAccuracy: {kernel_results[kernel]["accuracy"]:.4f}')
    axes[idx].set_ylabel('True Label')
    axes[idx].set_xlabel('Predicted Label')

plt.tight_layout()
plt.savefig('iris_confusion_matrices.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Compare all kernels
print(f"{'='*60}")
print("COMPARISON OF ALL KERNELS")
print(f"{'='*60}")

results_df = pd.DataFrame({
    'Kernel': kernels,
    'Accuracy': [kernel_results[k]['accuracy'] for k in kernels],
    'Precision': [kernel_results[k]['precision'] for k in kernels],
    'Recall': [kernel_results[k]['recall'] for k in kernels],
    'F1-Score': [kernel_results[k]['f1_score'] for k in kernels]
})

print(results_df.to_string(index=False))

# Identify best kernel
best_kernel = max(kernel_results.items(), key=lambda x: x[1]['accuracy'])[0]
print(f"\nBest Performing Kernel: {best_kernel.upper()}")

In [None]:
# Performance comparison bar chart
fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(kernels))
width = 0.2

metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
colors = ['steelblue', 'coral', 'green', 'purple']

for i, metric in enumerate(metrics):
    values = [kernel_results[k][metric.lower().replace('-', '_')] for k in kernels]
    ax.bar(x + i*width, values, width, label=metric, color=colors[i])

ax.set_xlabel('Kernel', fontweight='bold')
ax.set_ylabel('Score', fontweight='bold')
ax.set_title('Performance Comparison of SVM Kernels on Iris Dataset', fontweight='bold')
ax.set_xticks(x + width * 1.5)
ax.set_xticklabels([k.upper() for k in kernels])
ax.legend()
ax.set_ylim([0.9, 1.01])
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('iris_performance_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

---
# Question 2: Effect of Feature Scaling on SVM Performance
**Dataset: Breast Cancer**

## Load Breast Cancer Dataset

In [None]:
# Load Breast Cancer dataset
cancer = datasets.load_breast_cancer()
X_cancer = cancer.data
y_cancer = cancer.target

print(f"Breast Cancer Dataset Shape: {X_cancer.shape}")
print(f"Number of classes: {len(np.unique(y_cancer))}")
print(f"Class names: {cancer.target_names}")
print(f"Number of features: {X_cancer.shape[1]}")

In [None]:
# Show feature statistics to demonstrate different scales
print(f"\nFeature Statistics (showing scale differences):")
feature_stats = pd.DataFrame({
    'Feature': cancer.feature_names[:5],
    'Mean': X_cancer[:, :5].mean(axis=0),
    'Std': X_cancer[:, :5].std(axis=0),
    'Min': X_cancer[:, :5].min(axis=0),
    'Max': X_cancer[:, :5].max(axis=0)
})
print(feature_stats.to_string(index=False))
print("... (and 25 more features with varying scales)")

In [None]:
# Train-test split
X_train_cancer, X_test_cancer, y_train_cancer, y_test_cancer = train_test_split(
    X_cancer, y_cancer, test_size=0.2, random_state=42, stratify=y_cancer
)

print(f"Training set size: {X_train_cancer.shape[0]}")
print(f"Testing set size: {X_test_cancer.shape[0]}")

## A) SVM WITHOUT Feature Scaling

In [None]:
print(f"{'='*60}")
print("A) SVM (RBF Kernel) WITHOUT Feature Scaling")
print(f"{'='*60}")

# Train SVM without scaling
svm_no_scale = SVC(kernel='rbf', random_state=42)
svm_no_scale.fit(X_train_cancer, y_train_cancer)

# Predictions
y_train_pred_no_scale = svm_no_scale.predict(X_train_cancer)
y_test_pred_no_scale = svm_no_scale.predict(X_test_cancer)

# Calculate metrics
train_acc_no_scale = accuracy_score(y_train_cancer, y_train_pred_no_scale)
test_acc_no_scale = accuracy_score(y_test_cancer, y_test_pred_no_scale)

print(f"\nPerformance WITHOUT Scaling:")
print(f"  Training Accuracy: {train_acc_no_scale:.4f}")
print(f"  Testing Accuracy:  {test_acc_no_scale:.4f}")

print(f"\nConfusion Matrix (Without Scaling):")
cm_no_scale = confusion_matrix(y_test_cancer, y_test_pred_no_scale)
print(cm_no_scale)

print(f"\nClassification Report:")
print(classification_report(y_test_cancer, y_test_pred_no_scale, target_names=cancer.target_names))

## B) SVM WITH Feature Scaling

In [None]:
print(f"{'='*60}")
print("B) SVM (RBF Kernel) WITH Feature Scaling (StandardScaler)")
print(f"{'='*60}")

# Apply StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_cancer)
X_test_scaled = scaler.transform(X_test_cancer)

print(f"\nAfter scaling - Sample feature statistics:")
scaled_stats = pd.DataFrame({
    'Feature': cancer.feature_names[:5],
    'Mean': X_train_scaled[:, :5].mean(axis=0).round(4),
    'Std': X_train_scaled[:, :5].std(axis=0).round(4),
})
print(scaled_stats.to_string(index=False))
print("(All features now have mean = 0 and std = 1)")

In [None]:
# Train SVM with scaling
svm_with_scale = SVC(kernel='rbf', random_state=42)
svm_with_scale.fit(X_train_scaled, y_train_cancer)

# Predictions
y_train_pred_with_scale = svm_with_scale.predict(X_train_scaled)
y_test_pred_with_scale = svm_with_scale.predict(X_test_scaled)

# Calculate metrics
train_acc_with_scale = accuracy_score(y_train_cancer, y_train_pred_with_scale)
test_acc_with_scale = accuracy_score(y_test_cancer, y_test_pred_with_scale)

print(f"\nPerformance WITH Scaling:")
print(f"  Training Accuracy: {train_acc_with_scale:.4f}")
print(f"  Testing Accuracy:  {test_acc_with_scale:.4f}")

print(f"\nConfusion Matrix (With Scaling):")
cm_with_scale = confusion_matrix(y_test_cancer, y_test_pred_with_scale)
print(cm_with_scale)

print(f"\nClassification Report:")
print(classification_report(y_test_cancer, y_test_pred_with_scale, target_names=cancer.target_names))

## C) Comparison and Discussion

In [None]:
print(f"{'='*60}")
print("COMPARISON: WITH vs WITHOUT FEATURE SCALING")
print(f"{'='*60}")

comparison_df = pd.DataFrame({
    'Model': ['Without Scaling', 'With Scaling'],
    'Training Accuracy': [train_acc_no_scale, train_acc_with_scale],
    'Testing Accuracy': [test_acc_no_scale, test_acc_with_scale],
    'Improvement': [0, test_acc_with_scale - test_acc_no_scale]
})

print(comparison_df.to_string(index=False))
print(f"\nAccuracy Improvement: {(test_acc_with_scale - test_acc_no_scale)*100:.2f}%")

In [None]:
# Visualize comparison
fig, axes = plt.subplots(1, 3, figsize=(16, 4))

# Plot 1: Accuracy Comparison
x_axis = np.arange(2)
width = 0.35
train_accs = [train_acc_no_scale, train_acc_with_scale]
test_accs = [test_acc_no_scale, test_acc_with_scale]

axes[0].bar(x_axis - width/2, train_accs, width, label='Training Accuracy', color='skyblue')
axes[0].bar(x_axis + width/2, test_accs, width, label='Testing Accuracy', color='coral')
axes[0].set_xlabel('Model', fontweight='bold')
axes[0].set_ylabel('Accuracy', fontweight='bold')
axes[0].set_title('Accuracy Comparison', fontweight='bold')
axes[0].set_xticks(x_axis)
axes[0].set_xticklabels(['Without Scaling', 'With Scaling'])
axes[0].legend()
axes[0].set_ylim([0.85, 1.0])

# Plot 2: Confusion Matrix without scaling
sns.heatmap(cm_no_scale, annot=True, fmt='d', cmap='Blues', ax=axes[1],
            xticklabels=cancer.target_names, yticklabels=cancer.target_names)
axes[1].set_title(f'Without Scaling\nAccuracy: {test_acc_no_scale:.4f}', fontweight='bold')
axes[1].set_ylabel('True Label')
axes[1].set_xlabel('Predicted Label')

# Plot 3: Confusion Matrix with scaling
sns.heatmap(cm_with_scale, annot=True, fmt='d', cmap='Greens', ax=axes[2],
            xticklabels=cancer.target_names, yticklabels=cancer.target_names)
axes[2].set_title(f'With Scaling\nAccuracy: {test_acc_with_scale:.4f}', fontweight='bold')
axes[2].set_ylabel('True Label')
axes[2].set_xlabel('Predicted Label')

plt.tight_layout()
plt.savefig('breast_cancer_scaling_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

## Discussion: Effect of Feature Scaling on SVM

In [None]:
print(f"{'='*70}")
print("DISCUSSION: EFFECT OF FEATURE SCALING ON SVM PERFORMANCE")
print(f"{'='*70}")

print(f"""
1. WHY SCALING MATTERS FOR SVM:
   - SVMs use distance-based computations to find the optimal hyperplane
   - Features with larger scales dominate the distance calculations
   - Without scaling, features with larger ranges have disproportionate influence
   - The RBF kernel computes: exp(-gamma * ||x - y||^2)
   - Unscaled features make this calculation unstable and biased

2. OBSERVED RESULTS:
   - Without scaling: Training Acc = {train_acc_no_scale:.4f}, Testing Acc = {test_acc_no_scale:.4f}
   - With scaling: Training Acc = {train_acc_with_scale:.4f}, Testing Acc = {test_acc_with_scale:.4f}
   - Improvement: {(test_acc_with_scale - test_acc_no_scale)*100:.2f}%

3. IMPACT ON THE BREAST CANCER DATASET:
   - The dataset has features with vastly different scales
   - Example: 'mean radius' (6-28) vs 'worst concave points' (0-0.3)
   - Without scaling, high-magnitude features dominate
   - Scaling ensures all features contribute proportionally

4. STANDARDSCALER EFFECT:
   - Transforms each feature to have mean = 0 and std = 1
   - All features now contribute equally to distance calculations
   - Improves convergence and model performance
   - Essential for RBF and polynomial kernels

5. BEST PRACTICES:
   - ALWAYS use feature scaling for SVM (especially with RBF/polynomial kernels)
   - StandardScaler is most common for SVM
   - Fit scaler only on training data to avoid data leakage
   - Apply same transformation to test data

6. CONCLUSION:
   Feature scaling is CRITICAL for SVM performance. It ensures fair contribution
   from all features and significantly improves model accuracy and generalization.
""")

---
# Summary

- **Q1**: Compared SVM performance with Linear, Polynomial, and RBF kernels on Iris dataset
- **Q2**: Demonstrated the critical importance of feature scaling for SVM on Breast Cancer dataset
- **Key Takeaways**: 
  - RBF kernel often performs best for non-linear problems
  - Feature scaling is essential for SVM, especially with RBF kernel
  - StandardScaler ensures all features contribute equally