In [9]:
import numpy as np
import scipy.io
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import OneClassSVM
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score, make_scorer

## Load and split data

In [10]:
data = scipy.io.loadmat("cardio.mat")
X = data["X"]
y = data["y"].ravel()

# Convert labels to sklearn format: 1 (inliers), -1 (outliers)
# In pyod/mat file: 0 is inlier, 1 is outlier
y_sklearn = np.where(y == 0, 1, -1)

# Split 40% train, 60% test
X_train, X_test, y_train, y_test = train_test_split(
    X, y_sklearn, train_size=0.4, random_state=1
)

print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")
print(f"Train inliers: {np.sum(y_train == 1)}, Outliers: {np.sum(y_train == -1)}")

Training set shape: (732, 21)
Test set shape: (1099, 21)
Train inliers: 658, Outliers: 74


## Grid Search pipeline

In [11]:
# Pipeline
pipeline = Pipeline([("scaler", StandardScaler()), ("ocsvm", OneClassSVM())])

# Parameter Grid
param_grid = {
    "ocsvm__kernel": ["linear", "rbf", "poly", "sigmoid"],
    "ocsvm__gamma": ["scale", "auto", 0.001, 0.01, 0.1, 0.5, 1.0],
    "ocsvm__nu": [0.01, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5],
}

# Scorer
scorer = make_scorer(balanced_accuracy_score)

# Grid Search
grid_search = GridSearchCV(
    pipeline, param_grid, scoring=scorer, cv=5, n_jobs=-1, verbose=1
)

print("Starting Grid Search...")
grid_search.fit(X_train, y_train)
print("Grid Search Complete.")

Starting Grid Search...
Fitting 5 folds for each of 224 candidates, totalling 1120 fits
Grid Search Complete.


## Evaluation and best parameters

In [12]:
print("Best Parameters found:")
print(grid_search.best_params_)

best_model = grid_search.best_estimator_

# Predict 
y_test_pred = best_model.predict(X_test)

# Evaluate
ba_score = balanced_accuracy_score(y_test, y_test_pred)

print(f"\nBalanced Accuracy on Test Set: {ba_score:.4f}")

Best Parameters found:
{'ocsvm__gamma': 0.001, 'ocsvm__kernel': 'rbf', 'ocsvm__nu': 0.3}

Balanced Accuracy on Test Set: 0.8755
