# SVM Classification Tutorial
Generated: 2025-09-13T01:20:38

This notebook shows a complete, minimal workflow for **SVM** on a synthetic binary classification dataset with 12 features.

**What you'll do:**
1. Load the dataset (`svm_data.csv`) with 12 feature columns (`f1..f12`) and a `target` (0/1).
2. Explore shapes and basic stats.
3. Train/validation split.
4. Preprocess (scaling where appropriate).
5. Train a SVM classifier with reasonable defaults.
6. Evaluate with accuracy, precision, recall, F1, confusion matrix, ROC-AUC.
7. Save the fitted model (optional).


In [None]:
# Setup
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report, RocCurveDisplay

import matplotlib.pyplot as plt

# Load data
data_path = "../datasets/svm_data.csv"
df = pd.read_csv(data_path)

print("Shape:", df.shape)
df.head()

In [None]:
# Basic EDA
print(df.describe().T)
print("\nClass distribution:\n", df['target'].value_counts(normalize=True).rename('proportion'))

In [None]:
# Train/Validation split
X = df.drop(columns=['target'])
y = df['target']

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.3, random_state=123, stratify=y
)

X_train.shape, X_val.shape

In [None]:
# SVM tends to benefit from scaling
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", SVC(kernel="rbf", C=1.0, gamma="scale", probability=True, random_state=123))
])

pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_val)
y_proba = pipe.predict_proba(X_val)[:, 1]

In [None]:
# Evaluation
acc = accuracy_score(y_val, y_pred)
prec = precision_score(y_val, y_pred)
rec = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)
auc = roc_auc_score(y_val, y_proba)

print(f"Accuracy : {acc:.3f}")
print(f"Precision: {prec:.3f}")
print(f"Recall   : {rec:.3f}")
print(f"F1-score : {f1:.3f}")
print(f"ROC AUC  : {auc:.3f}")

print("\nClassification Report:\n", classification_report(y_val, y_pred))

# Confusion matrix
cm = confusion_matrix(y_val, y_pred)
print("\nConfusion Matrix:\n", cm)

# ROC Curve (uses matplotlib; no style or color specified)
RocCurveDisplay.from_predictions(y_val, y_proba)
plt.title("ROC Curve")
plt.show()

In [None]:
# (Optional) Save model with joblib for later reuse
# Uncomment to persist
# import joblib
# joblib.dump(clf if 'clf' in globals() else pipe, "model.joblib")
