In [2]:
import numpy as np
import psutil  # For memory tracking
import pandas as pd
import joblib  # For model saving/loading
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import time
import os

class ClassicalSVM:
    def __init__(self, data, pca_components):
        print("Initializing Classical SVM...")
        self.data = data
        self.pca_components = pca_components
        self.apply_pca()

    def apply_pca(self):
        """Reduce feature dimensions using PCA"""
        print("Applying PCA...")
        pca = PCA(n_components=self.pca_components)
        self.data.train_features = pca.fit_transform(self.data.train_features)
        self.data.test_features = pca.transform(self.data.test_features)
        print(f"PCA Variance Ratio: {sum(pca.explained_variance_ratio_):.4f}")

    
    @staticmethod
    def svm_hyperparameter_tuning(train_features, train_labels):
        print("Starting hyperparameter tuning...")

        param_grid = {
        'C': [0.1, 1, 10, 100],
        'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
        'gamma': ['scale', 'auto', 0.01, 0.1, 1],
        'degree': [2, 3, 4]  # Only relevant for 'poly'
        }

        svc = SVC()
        grid_search = GridSearchCV(svc, param_grid, cv=4, scoring='accuracy', verbose=1, n_jobs=-1)
        grid_search.fit(train_features, train_labels)

        print(f"Best Parameters: {grid_search.best_params_}")
        print(f"Best Cross-Validation Score: {grid_search.best_score_:.4f}")

        return grid_search.best_estimator_

    def fit(self):
        """Train classical SVM with memory tracking"""
        print("Training Classical SVM...")

        process = psutil.Process()
        mem_before = process.memory_info().rss / (1024 * 1024)  # in MB

        start_time = time.time()
        self.svc = SVC(kernel='rbf', C=1.0)
        self.svc = self.svm_hyperparameter_tuning(self.data.train_features, self.data.train_labels)
        end_time = time.time()

        mem_after = process.memory_info().rss / (1024 * 1024)

        print(f"SVM training complete. Time taken: {(end_time - start_time) / 60:.2f} minutes.")
        print(f"Memory Usage: {mem_after - mem_before:.2f} MB (during training)")

    def evaluate(self):
        """Evaluate SVM using accuracy metrics and memory tracking"""
        print("Evaluating Classical SVM...")

        process = psutil.Process()
        mem_before = process.memory_info().rss / (1024 * 1024)

        start_time = time.time()
        predictions = self.svc.predict(self.data.test_features)
        end_time = time.time()

        mem_after = process.memory_info().rss / (1024 * 1024)

        print(f"SVM evaluation complete. Time taken: {(end_time - start_time) / 60:.2f} minutes.")
        print(f"Memory Usage: {mem_after - mem_before:.2f} MB (during evaluation)")

        acc = accuracy_score(self.data.test_labels, predictions)
        prec = precision_score(self.data.test_labels, predictions, average="weighted")
        rec = recall_score(self.data.test_labels, predictions, average="weighted")
        f1 = f1_score(self.data.test_labels, predictions, average="weighted")
        conf_matrix = confusion_matrix(self.data.test_labels, predictions)

        print(f"SVM Accuracy: {acc:.4f}")
        print(f"Precision: {prec:.4f}")
        print(f"Recall: {rec:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print("Confusion Matrix:\n", conf_matrix)

        return {
            "accuracy": acc,
            "precision": prec,
            "recall": rec,
            "f1_score": f1,
            "confusion_matrix": conf_matrix
        }

    def save_model(self, filename="csvm_midmodel.pkl"):
        """Save the trained classical SVM model"""
        if not hasattr(self, "svc") or self.svc is None:
            print("Error: No trained model found. Train the SVM first.")
            return

        print("Saving SVM model...")
        joblib.dump(self.svc, filename)
        print(f"SVM model saved to {filename}")

    def load_model(self, filename="csvm_midmodel.pkl"):
        """Load a previously saved SVM model"""
        if not os.path.exists(filename):
            print(f"Error: Model file {filename} not found.")
            return

        print("Loading SVM model...")
        self.svc = joblib.load(filename)
        print(f"SVM model loaded from {filename}")

class Data:
    def __init__(self, filename):
        print("Initializing dataset...")
        self.filename = filename
        self.load_data()
        self.prepare_datasets()

    def load_data(self):
        """Load dataset and apply necessary preprocessing"""
        print("Loading dataset...")
        self.df = pd.read_csv(self.filename)

    def prepare_datasets(self):
        """Split dataset into train/test and scale features"""
        print("Preparing datasets...")

        features = self.df.drop(columns=["HAS ADHD"]).to_numpy()
        labels = self.df["HAS ADHD"].to_numpy()

        scaler = MinMaxScaler(feature_range=(0, 2 * np.pi))
        features = scaler.fit_transform(features)

        train_features, test_features, train_labels, test_labels = train_test_split(
            features, labels, test_size=0.2, stratify=labels, random_state=42
        )

        unique, counts = np.unique(train_labels, return_counts=True)
        class_distribution = dict(zip(unique, counts))
        print(f"Class Distribution in Train Dataset: {class_distribution}")

        self.train_features, self.train_labels = train_features, train_labels
        self.test_features, self.test_labels = test_features, test_labels

        print(f"Final Train set size: {len(self.train_features)}, Test set size: {len(self.test_features)}")


# Load dataset and train Classical SVM
dataset = Data('MLsheet - SRSno-avg.csv')
svm_model = ClassicalSVM(dataset, pca_components=6)
svm_model.fit()

# Evaluate SVM performance
metrics = svm_model.evaluate()

# Save the trained model for later use
svm_model.save_model()

Initializing dataset...
Loading dataset...
Preparing datasets...
Class Distribution in Train Dataset: {np.int64(0): np.int64(834), np.int64(1): np.int64(806)}
Final Train set size: 1640, Test set size: 410
Initializing Classical SVM...
Applying PCA...
PCA Variance Ratio: 1.0000
Training Classical SVM...
Starting hyperparameter tuning...
Fitting 4 folds for each of 240 candidates, totalling 960 fits
Best Parameters: {'C': 100, 'degree': 2, 'gamma': 1, 'kernel': 'rbf'}
Best Cross-Validation Score: 0.9409
SVM training complete. Time taken: 70.90 minutes.
Memory Usage: 2.98 MB (during training)
Evaluating Classical SVM...
SVM evaluation complete. Time taken: 0.00 minutes.
Memory Usage: 0.02 MB (during evaluation)
SVM Accuracy: 0.9463
Precision: 0.9479
Recall: 0.9463
F1 Score: 0.9463
Confusion Matrix:
 [[192  17]
 [  5 196]]
Saving SVM model...
SVM model saved to csvm_midmodel.pkl
