In [None]:
import numpy as np
import scipy.io
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
from pyod.models.ocsvm import OCSVM
from pyod.models.deep_svdd import DeepSVDD

# Set random seed for reproducibility
random_state = 1

## Load and Split Data

In [None]:
data = scipy.io.loadmat("shuttle.mat")
X = data["X"]
y = data["y"].ravel()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.5, random_state=random_state
)

print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")
print(f"Train outliers: {np.sum(y_train == 1)}")

Training set shape: (24548, 9)
Test set shape: (24549, 9)
Train outliers: 1727


## Normalize Data

In [3]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## OCSVM

In [None]:
print("--- OCSVM ---")
ocsvm = OCSVM(contamination=0.07, kernel="rbf")  # Contamination approx 7% for shuttle
ocsvm.fit(X_train_scaled)

# Predict
y_test_pred_ocsvm = ocsvm.predict(X_test_scaled)
y_test_scores_ocsvm = ocsvm.decision_function(X_test_scaled)

# Evaluate
ba_ocsvm = balanced_accuracy_score(y_test, y_test_pred_ocsvm)
roc_ocsvm = roc_auc_score(y_test, y_test_scores_ocsvm)

print(f"Balanced Accuracy: {ba_ocsvm:.4f}")
print(f"ROC AUC: {roc_ocsvm:.4f}")

--- OCSVM ---
Balanced Accuracy: 0.9785
ROC AUC: 0.9914


## DeepSVDD (Diff architectures)

In [None]:
architectures = [[64, 32], [32, 16], [128, 64, 32], [64, 32, 16, 8]]

for arch in architectures:
    print(f"\n--- DeepSVDD Architecture: {arch} ---")
    # epochs might need adjustment
    deep_svdd = DeepSVDD(
        contamination=0.07,
        hidden_neurons=arch,
        epochs=50,
        verbose=0,
        random_state=random_state,
        n_features=X_train_scaled.shape[1],
    )
    deep_svdd.fit(X_train_scaled)

    # Predict
    y_test_pred_deep = deep_svdd.predict(X_test_scaled)
    y_test_scores_deep = deep_svdd.decision_function(X_test_scaled)

    # Evaluate
    ba_deep = balanced_accuracy_score(y_test, y_test_pred_deep)
    roc_deep = roc_auc_score(y_test, y_test_scores_deep)

    print(f"Balanced Accuracy: {ba_deep:.4f}")
    print(f"ROC AUC: {roc_deep:.4f}")


--- DeepSVDD Architecture: [64, 32] ---
Epoch 1/50, Loss: 502.1226440742612
Epoch 2/50, Loss: 502.18038289248943
Epoch 3/50, Loss: 502.0956272035837
Epoch 4/50, Loss: 502.2561963945627
Epoch 5/50, Loss: 502.3407655954361
Epoch 6/50, Loss: 502.9009421020746
Epoch 7/50, Loss: 503.0354852974415
Epoch 8/50, Loss: 502.3003862798214
Epoch 9/50, Loss: 502.1089000403881
Epoch 10/50, Loss: 502.2810709774494
Epoch 11/50, Loss: 502.1362334191799
Epoch 12/50, Loss: 502.2664827555418
Epoch 13/50, Loss: 502.8150181174278
Epoch 14/50, Loss: 502.3380914926529
Epoch 15/50, Loss: 502.22004432976246
Epoch 16/50, Loss: 502.2535367757082
Epoch 17/50, Loss: 502.12886422872543
Epoch 18/50, Loss: 502.1710855066776
Epoch 19/50, Loss: 502.9611404687166
Epoch 20/50, Loss: 502.2444275766611
Epoch 21/50, Loss: 502.1834599971771
Epoch 22/50, Loss: 502.90861885249615
Epoch 23/50, Loss: 502.2800289541483
Epoch 24/50, Loss: 502.31644719839096
Epoch 25/50, Loss: 502.1315144598484
Epoch 26/50, Loss: 502.2616513520479
E

In [None]:
architectures = [[16, 8], [8, 4], [4, 2]]

for arch in architectures:
    print(f"\n--- DeepSVDD Architecture: {arch} ---")
    # epochs might need adjustment
    deep_svdd = DeepSVDD(
        contamination=0.07,
        hidden_neurons=arch,
        epochs=50,
        verbose=0,
        random_state=random_state,
        n_features=X_train_scaled.shape[1],
    )
    deep_svdd.fit(X_train_scaled)

    # Predict
    y_test_pred_deep = deep_svdd.predict(X_test_scaled)
    y_test_scores_deep = deep_svdd.decision_function(X_test_scaled)

    # Evaluate
    ba_deep = balanced_accuracy_score(y_test, y_test_pred_deep)
    roc_deep = roc_auc_score(y_test, y_test_scores_deep)

    print(f"Balanced Accuracy: {ba_deep:.4f}")
    print(f"ROC AUC: {roc_deep:.4f}")


--- DeepSVDD Architecture: [16, 8] ---
Epoch 1/50, Loss: 273.11952931061387
Epoch 2/50, Loss: 273.1990171261132
Epoch 3/50, Loss: 273.1442191377282
Epoch 4/50, Loss: 273.107747502625
Epoch 5/50, Loss: 273.2295179255307
Epoch 6/50, Loss: 273.07587012276053
Epoch 7/50, Loss: 273.14539473876357
Epoch 8/50, Loss: 273.10222313925624
Epoch 9/50, Loss: 273.66661850363016
Epoch 10/50, Loss: 273.7794629149139
Epoch 11/50, Loss: 273.069736007601
Epoch 12/50, Loss: 273.5500958375633
Epoch 13/50, Loss: 273.1327325217426
Epoch 14/50, Loss: 273.1217921860516
Epoch 15/50, Loss: 273.09728037938476
Epoch 16/50, Loss: 273.72848378121853
Epoch 17/50, Loss: 273.5789009556174
Epoch 18/50, Loss: 273.6710529848933
Epoch 19/50, Loss: 273.61454048380256
Epoch 20/50, Loss: 273.19646836444736
Epoch 21/50, Loss: 273.79002027213573
Epoch 22/50, Loss: 273.56841300055385
Epoch 23/50, Loss: 273.08765330165625
Epoch 24/50, Loss: 273.0967226997018
Epoch 25/50, Loss: 273.1172134578228
Epoch 26/50, Loss: 273.14071385189

The [8,4] architecture seems to provide the best cost to accuracy ratio.