In [6]:
# 🧪 SVM Example: Classifying Synthetic Data in 4D Feature Space

# ------------------------------------------------------------
# 📦 Import libraries
# ------------------------------------------------------------
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from svm import SVMModel  # 🧠 Our custom SVM wrapper (uses scikit-learn)

In [7]:
# ------------------------------------------------------------
# 🎲 Step 1: Generate synthetic classification data with 4 features
# ------------------------------------------------------------

# ❓ Why: Simulating a health dataset where each sample has 4 features:
# - Age (years)
# - Resting Heart Rate (bpm)
# - Systolic Blood Pressure (mmHg)
# - Cholesterol Level (mg/dL)
#
# Classes: 0 = Healthy, 1 = At Risk

# 💡 Fun fact: Synthetic health data helps researchers test algorithms before applying to real patient data!

# ------------------------------------------------------------
# ✅ Step A: Set the random seed for reproducibility
# ------------------------------------------------------------
np.random.seed(42)
# Ensures same synthetic data each run, which helps debugging and sharing results.

# ------------------------------------------------------------
# 🧊 Step B: Generate data points for Healthy patients (Class 0)
# ------------------------------------------------------------

# Multivariate normal distribution creates points clustered around a center,
# with spread and orientation defined by covariance matrix.

mean_healthy = [35, 65, 120, 180]  # Average healthy values: Age, HR, BP, Cholesterol
# The covariance matrix shows how each feature relates to every other feature:
# - Diagonal elements (cov[i][i]) show the variance of feature i (spread of that feature alone).
# - Off-diagonal elements (cov[i][j] where i != j) show covariance between feature i and feature j:
#   For a 4x4 matrix with features [0, 1, 2, 3], these represent:
#     cov[0][1] and cov[1][0]: covariance between feature 0 and feature 1,
#     cov[0][2] and cov[2][0]: covariance between feature 0 and feature 2,
#     cov[0][3] and cov[3][0]: covariance between feature 0 and feature 3,
#     cov[1][2] and cov[2][1]: covariance between feature 1 and feature 2,
#     cov[1][3] and cov[3][1]: covariance between feature 1 and feature 3,
#     cov[2][3] and cov[3][2]: covariance between feature 2 and feature 3.
# Each off-diagonal value tells us how those two specific features vary together:
# positive → both increase or decrease together,
# negative → one increases while the other decreases,
# zero → features vary independently (no linear relation).
# Because the matrix is symmetric, cov[i][j] = cov[j][i].

cov_healthy = [
    [30, 2, 5, 10],
    [2, 25, 3, 5],
    [5, 3, 40, 12],
    [10, 5, 12, 50]
] 

class0 = np.random.multivariate_normal(mean_healthy, cov_healthy, size=50)

# ------------------------------------------------------------
# 🔥 Step C: Generate data points for At Risk patients (Class 1)
# ------------------------------------------------------------

mean_risk = [55, 80, 140, 240]  # Older age, higher heart rate, BP and cholesterol on average
cov_risk = [
    [40, 5, 7, 15],
    [5, 30, 4, 8],
    [7, 4, 50, 18],
    [15, 8, 18, 60]
]

class1 = np.random.multivariate_normal(mean_risk, cov_risk, size=50)

# ------------------------------------------------------------
# 🧩 Step D: Combine features and labels
# ------------------------------------------------------------
X = np.vstack((class0, class1))  # Stack vertically → (100, 4)
y = np.array([0]*50 + [1]*50)    # Labels: first 50 zeros, next 50 ones

# ------------------------------------------------------------
# 📊 Step E: Print basic info about dataset shape
print(f"Feature matrix shape: {X.shape} (samples, features)")
print(f"Label vector shape: {y.shape}")



Feature matrix shape: (100, 4) (samples, features)
Label vector shape: (100,)


In [8]:
# ------------------------------------------------------------
# ✂️ Step 2: Split dataset into training and testing sets
# ------------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)




In [9]:
# ------------------------------------------------------------
# 🧠 Step 3: Train SVM model with RBF kernel (good for complex data)
# ------------------------------------------------------------
# RBF kernel maps data into higher-dimensional space, enabling non-linear decision boundaries.
# Useful for data that’s not linearly separable in original feature space.
model = SVMModel(kernel="rbf")
model.train(X_train, y_train)



In [10]:
# ------------------------------------------------------------
# 🔍 Step 4: Make predictions and evaluate
# ------------------------------------------------------------
y_pred = model.predict(X_test)

# Print accuracy score and classification report with precision, recall, and F1-score
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("📊 Classification Report:\n", classification_report(y_test, y_pred))

# ------------------------------------------------------------
# 🎉 Reminder: Features are [Age, Resting Heart Rate, Systolic BP, Cholesterol]
# 🎉 No plotting here because visualizing 4D is tricky,
# but you can use dimensionality reduction (e.g. PCA) to visualize if needed!

✅ Accuracy: 1.0
📊 Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20

