## Ex 1
### 1.1

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from pyod.utils.data import generate_data
from pyod.models.ocsvm import OCSVM
from pyod.models.deep_svdd import DeepSVDD

from sklearn.metrics import balanced_accuracy_score, roc_auc_score

np.random.seed(724)

In [None]:
X_train, X_test, y_train, y_test = generate_data(
    n_train=300,
    n_test=200,
    n_features=3,
    contamination=0.15,
    random_state=42
)

print(f"training data shape: {X_train.shape}")
print(f"test data shape: {X_test.shape}")
print(f"training contamination: {np.mean(y_train):.2%}")
print(f"test contamination: {np.mean(y_test):.2%}")

### 1.2

In [None]:
# ocsvm
ocsvm_linear = OCSVM(kernel='linear', contamination=0.15)
ocsvm_linear.fit(X_train)

# test data
y_pred_linear = ocsvm_linear.predict(X_test)
y_scores_linear = ocsvm_linear.decision_function(X_test)

# metrics
ba_linear = balanced_accuracy_score(y_test, y_pred_linear)
auc_linear = roc_auc_score(y_test, y_scores_linear)

print("ocsvm with linear kernel:")
print(f"balanced accuracy: {ba_linear:.4f}")
print(f"roc auc: {auc_linear:.4f}")

### 1.3

In [None]:
fig = plt.figure(figsize=(16, 12))

# Plot 1: Training data - Ground truth
ax1 = fig.add_subplot(2, 2, 1, projection='3d')
ax1.scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], X_train[y_train==0, 2], 
           c='blue', label='Inliers', alpha=0.6, s=20)
ax1.scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], X_train[y_train==1, 2], 
           c='red', label='Outliers', alpha=0.8, s=40, marker='x')
ax1.set_title('Training Data - Ground Truth', fontsize=12, fontweight='bold')
ax1.set_xlabel('Feature 1')
ax1.set_ylabel('Feature 2')
ax1.set_zlabel('Feature 3')
ax1.legend()

# Plot 2: Training data - Predicted (OCSVM Linear)
y_pred_train = ocsvm_linear.predict(X_train)
ax2 = fig.add_subplot(2, 2, 2, projection='3d')
ax2.scatter(X_train[y_pred_train==0, 0], X_train[y_pred_train==0, 1], X_train[y_pred_train==0, 2], 
           c='blue', label='Inliers', alpha=0.6, s=20)
ax2.scatter(X_train[y_pred_train==1, 0], X_train[y_pred_train==1, 1], X_train[y_pred_train==1, 2], 
           c='red', label='Outliers', alpha=0.8, s=40, marker='x')
ax2.set_title('Training Data - OCSVM Linear Predictions', fontsize=12, fontweight='bold')
ax2.set_xlabel('Feature 1')
ax2.set_ylabel('Feature 2')
ax2.set_zlabel('Feature 3')
ax2.legend()

# Plot 3: Test data - Ground truth
ax3 = fig.add_subplot(2, 2, 3, projection='3d')
ax3.scatter(X_test[y_test==0, 0], X_test[y_test==0, 1], X_test[y_test==0, 2], 
           c='blue', label='Inliers', alpha=0.6, s=20)
ax3.scatter(X_test[y_test==1, 0], X_test[y_test==1, 1], X_test[y_test==1, 2], 
           c='red', label='Outliers', alpha=0.8, s=40, marker='x')
ax3.set_title('Test Data - Ground Truth', fontsize=12, fontweight='bold')
ax3.set_xlabel('Feature 1')
ax3.set_ylabel('Feature 2')
ax3.set_zlabel('Feature 3')
ax3.legend()

# Plot 4: Test data - Predicted (OCSVM Linear)
ax4 = fig.add_subplot(2, 2, 4, projection='3d')
ax4.scatter(X_test[y_pred_linear==0, 0], X_test[y_pred_linear==0, 1], X_test[y_pred_linear==0, 2], 
           c='blue', label='Inliers', alpha=0.6, s=20)
ax4.scatter(X_test[y_pred_linear==1, 0], X_test[y_pred_linear==1, 1], X_test[y_pred_linear==1, 2], 
           c='red', label='Outliers', alpha=0.8, s=40, marker='x')
ax4.set_title(f'Test Data - OCSVM Linear (BA={ba_linear:.3f}, AUC={auc_linear:.3f})', 
             fontsize=12, fontweight='bold')
ax4.set_xlabel('Feature 1')
ax4.set_ylabel('Feature 2')
ax4.set_zlabel('Feature 3')
ax4.legend()

plt.tight_layout()
plt.show()

### 1.4

In [None]:
# Train OCSVM with RBF kernel
ocsvm_rbf = OCSVM(kernel='rbf', contamination=0.15)
ocsvm_rbf.fit(X_train)

# Predict on test data
y_pred_rbf = ocsvm_rbf.predict(X_test)
y_scores_rbf = ocsvm_rbf.decision_function(X_test)

ba_rbf = balanced_accuracy_score(y_test, y_pred_rbf)
auc_rbf = roc_auc_score(y_test, y_scores_rbf)

print("OCSVM with RBF Kernel:")
print(f"Balanced Accuracy: {ba_rbf:.4f}")
print(f"ROC AUC: {auc_rbf:.4f}")

print("\nComparison:")
print(f"Linear Kernel - BA: {ba_linear:.4f}, AUC: {auc_linear:.4f}")
print(f"RBF Kernel    - BA: {ba_rbf:.4f}, AUC: {auc_rbf:.4f}")
print(f"\nImprovement: BA={ba_rbf-ba_linear:+.4f}, AUC={auc_rbf-auc_linear:+.4f}")

In [None]:
fig = plt.figure(figsize=(16, 12))

# Plot 1: Training data - Ground truth
ax1 = fig.add_subplot(2, 2, 1, projection='3d')
ax1.scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], X_train[y_train==0, 2], 
           c='blue', label='Inliers', alpha=0.6, s=20)
ax1.scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], X_train[y_train==1, 2], 
           c='red', label='Outliers', alpha=0.8, s=40, marker='x')
ax1.set_title('Training Data - Ground Truth', fontsize=12, fontweight='bold')
ax1.set_xlabel('Feature 1')
ax1.set_ylabel('Feature 2')
ax1.set_zlabel('Feature 3')
ax1.legend()

# Plot 2: Training data - Predicted (OCSVM RBF)
y_pred_train_rbf = ocsvm_rbf.predict(X_train)
ax2 = fig.add_subplot(2, 2, 2, projection='3d')
ax2.scatter(X_train[y_pred_train_rbf==0, 0], X_train[y_pred_train_rbf==0, 1], X_train[y_pred_train_rbf==0, 2], 
           c='blue', label='Inliers', alpha=0.6, s=20)
ax2.scatter(X_train[y_pred_train_rbf==1, 0], X_train[y_pred_train_rbf==1, 1], X_train[y_pred_train_rbf==1, 2], 
           c='red', label='Outliers', alpha=0.8, s=40, marker='x')
ax2.set_title('Training Data - OCSVM RBF Predictions', fontsize=12, fontweight='bold')
ax2.set_xlabel('Feature 1')
ax2.set_ylabel('Feature 2')
ax2.set_zlabel('Feature 3')
ax2.legend()

# Plot 3: Test data - Ground truth
ax3 = fig.add_subplot(2, 2, 3, projection='3d')
ax3.scatter(X_test[y_test==0, 0], X_test[y_test==0, 1], X_test[y_test==0, 2], 
           c='blue', label='Inliers', alpha=0.6, s=20)
ax3.scatter(X_test[y_test==1, 0], X_test[y_test==1, 1], X_test[y_test==1, 2], 
           c='red', label='Outliers', alpha=0.8, s=40, marker='x')
ax3.set_title('Test Data - Ground Truth', fontsize=12, fontweight='bold')
ax3.set_xlabel('Feature 1')
ax3.set_ylabel('Feature 2')
ax3.set_zlabel('Feature 3')
ax3.legend()

# Plot 4: Test data - Predicted (OCSVM RBF)
ax4 = fig.add_subplot(2, 2, 4, projection='3d')
ax4.scatter(X_test[y_pred_rbf==0, 0], X_test[y_pred_rbf==0, 1], X_test[y_pred_rbf==0, 2], 
           c='blue', label='Inliers', alpha=0.6, s=20)
ax4.scatter(X_test[y_pred_rbf==1, 0], X_test[y_pred_rbf==1, 1], X_test[y_pred_rbf==1, 2], 
           c='red', label='Outliers', alpha=0.8, s=40, marker='x')
ax4.set_title(f'Test Data - OCSVM RBF (BA={ba_rbf:.3f}, AUC={auc_rbf:.3f})', 
             fontsize=12, fontweight='bold')
ax4.set_xlabel('Feature 1')
ax4.set_ylabel('Feature 2')
ax4.set_zlabel('Feature 3')
ax4.legend()

plt.tight_layout()
plt.show()

### 1.5

In [None]:
# Train Deep SVDD
deep_svdd = DeepSVDD(
    contamination=0.15,
    epochs=50,
    verbose=1,
    random_state=42,
    n_features=3
)
deep_svdd.fit(X_train)

# Predict on test data
y_pred_deep = deep_svdd.predict(X_test)
y_scores_deep = deep_svdd.decision_function(X_test)

ba_deep = balanced_accuracy_score(y_test, y_pred_deep)
auc_deep = roc_auc_score(y_test, y_scores_deep)

print("\nDeep SVDD:")
print(f"Balanced Accuracy: {ba_deep:.4f}")
print(f"ROC AUC: {auc_deep:.4f}")

In [None]:
fig = plt.figure(figsize=(16, 12))

# Plot 1: Training data - Ground truth
ax1 = fig.add_subplot(2, 2, 1, projection='3d')
ax1.scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], X_train[y_train==0, 2], 
           c='blue', label='Inliers', alpha=0.6, s=20)
ax1.scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], X_train[y_train==1, 2], 
           c='red', label='Outliers', alpha=0.8, s=40, marker='x')
ax1.set_title('Training Data - Ground Truth', fontsize=12, fontweight='bold')
ax1.set_xlabel('Feature 1')
ax1.set_ylabel('Feature 2')
ax1.set_zlabel('Feature 3')
ax1.legend()

# Plot 2: Training data - Predicted (Deep SVDD)
y_pred_train_deep = deep_svdd.predict(X_train)
ax2 = fig.add_subplot(2, 2, 2, projection='3d')
ax2.scatter(X_train[y_pred_train_deep==0, 0], X_train[y_pred_train_deep==0, 1], X_train[y_pred_train_deep==0, 2], 
           c='blue', label='Inliers', alpha=0.6, s=20)
ax2.scatter(X_train[y_pred_train_deep==1, 0], X_train[y_pred_train_deep==1, 1], X_train[y_pred_train_deep==1, 2], 
           c='red', label='Outliers', alpha=0.8, s=40, marker='x')
ax2.set_title('Training Data - Deep SVDD Predictions', fontsize=12, fontweight='bold')
ax2.set_xlabel('Feature 1')
ax2.set_ylabel('Feature 2')
ax2.set_zlabel('Feature 3')
ax2.legend()

# Plot 3: Test data - Ground truth
ax3 = fig.add_subplot(2, 2, 3, projection='3d')
ax3.scatter(X_test[y_test==0, 0], X_test[y_test==0, 1], X_test[y_test==0, 2], 
           c='blue', label='Inliers', alpha=0.6, s=20)
ax3.scatter(X_test[y_test==1, 0], X_test[y_test==1, 1], X_test[y_test==1, 2], 
           c='red', label='Outliers', alpha=0.8, s=40, marker='x')
ax3.set_title('Test Data - Ground Truth', fontsize=12, fontweight='bold')
ax3.set_xlabel('Feature 1')
ax3.set_ylabel('Feature 2')
ax3.set_zlabel('Feature 3')
ax3.legend()

# Plot 4: Test data - Predicted (Deep SVDD)
ax4 = fig.add_subplot(2, 2, 4, projection='3d')
ax4.scatter(X_test[y_pred_deep==0, 0], X_test[y_pred_deep==0, 1], X_test[y_pred_deep==0, 2], 
           c='blue', label='Inliers', alpha=0.6, s=20)
ax4.scatter(X_test[y_pred_deep==1, 0], X_test[y_pred_deep==1, 1], X_test[y_pred_deep==1, 2], 
           c='red', label='Outliers', alpha=0.8, s=40, marker='x')
ax4.set_title(f'Test Data - Deep SVDD (BA={ba_deep:.3f}, AUC={auc_deep:.3f})', 
             fontsize=12, fontweight='bold')
ax4.set_xlabel('Feature 1')
ax4.set_ylabel('Feature 2')
ax4.set_zlabel('Feature 3')
ax4.legend()

plt.tight_layout()
plt.show()

## Ex2

### 2.1

In [None]:
from scipy.io import loadmat
from sklearn.model_selection import train_test_split

mat = loadmat('../lab2/cardio.mat')
X = mat['X']
y = mat['y'].ravel()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.4, random_state=42, stratify=y
)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")

### 2.2

In [None]:
# sklearn_label = -2 * pyod_label + 1
y_train_sklearn = -2 * y_train + 1
y_test_sklearn = -2 * y_test + 1

print("Label conversion:")
print(f"PyOD format - Inlier: 0, Outlier: 1")
print(f"sklearn format - Inlier: 1, Outlier: -1")
print(f"\nTrain labels sklearn format: {np.unique(y_train_sklearn, return_counts=True)}")
print(f"Test labels sklearn format: {np.unique(y_test_sklearn, return_counts=True)}")

### 2.3

In [None]:
# Calculate contamination for train set
contamination = np.mean(y_train)
print(f"Train contamination: {contamination:.4f}")

# Define parameter grid
param_grid = {
    'ocsvm__kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'ocsvm__nu': [0.01, 0.05, 0.1, contamination, 0.2, 0.3],
    'ocsvm__gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1.0]
}

print("\nParameter grid:")
for key, values in param_grid.items():
    print(f"{key}: {values}")

### 2.4

In [None]:
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('ocsvm', OneClassSVM())
])

print("start")
grid_search = GridSearchCV(
    pipeline,
    param_grid,
    cv=3,
    scoring='balanced_accuracy',
    n_jobs=-1,
    verbose=1
)

grid_search.fit(X_train, y_train_sklearn)

print("gata")

### 2.5

In [None]:
best_params = grid_search.best_params_
best_score_cv = grid_search.best_score_

print("="*60)
print("BEST PARAMETERS FOUND BY GridSearchCV")
print("="*60)
for param, value in best_params.items():
    print(f"{param}: {value}")
print(f"\nBest CV Balanced Accuracy: {best_score_cv:.4f}")
print("="*60)

# Evaluate on test set
y_pred_test = grid_search.predict(X_test)
ba_test = balanced_accuracy_score(y_test_sklearn, y_pred_test)

print("\n" + "="*60)
print("TEST SET EVALUATION")
print("="*60)
print(f"Balanced Accuracy on Test Set: {ba_test:.4f}")
print("="*60)

## Ex 3

### 3.1

In [None]:
mat_shuttle = loadmat('../lab3/shuttle.mat')
X_shuttle = mat_shuttle['X']
y_shuttle = mat_shuttle['y'].ravel()

X_train_shuttle, X_test_shuttle, y_train_shuttle, y_test_shuttle = train_test_split(
    X_shuttle, y_shuttle, test_size=0.5, random_state=42, stratify=y_shuttle
)

print(f"\nTraining set: {X_train_shuttle.shape[0]} samples")
print(f"Test set: {X_test_shuttle.shape[0]} samples")

from pyod.utils.utility import standardizer
X_train_shuttle, X_test_shuttle = standardizer(X_train_shuttle, X_test_shuttle)

### 3.2

In [None]:
contamination_shuttle = np.mean(y_train_shuttle)
print(f"Train contamination: {contamination_shuttle:.4f}")

# Train OCSVM
ocsvm_shuttle = OCSVM(kernel='rbf', contamination=contamination_shuttle)
ocsvm_shuttle.fit(X_train_shuttle)

# Predict and evaluate OCSVM
y_pred_ocsvm = ocsvm_shuttle.predict(X_test_shuttle)
y_scores_ocsvm = ocsvm_shuttle.decision_function(X_test_shuttle)

ba_ocsvm = balanced_accuracy_score(y_test_shuttle, y_pred_ocsvm)
auc_ocsvm = roc_auc_score(y_test_shuttle, y_scores_ocsvm)

print("\nOCSVM Results:")
print(f"Balanced Accuracy: {ba_ocsvm:.4f}")
print(f"ROC AUC: {auc_ocsvm:.4f}")

In [None]:
print(X_train_shuttle.shape)

In [None]:
deep_svdd_shuttle = DeepSVDD(
    contamination=contamination_shuttle,
    epochs=50,
    verbose=1,
    random_state=42,
    n_features=X_train_shuttle.shape[1]
)
deep_svdd_shuttle.fit(X_train_shuttle)

# Predict and evaluate DeepSVDD
y_pred_deep = deep_svdd_shuttle.predict(X_test_shuttle)
y_scores_deep = deep_svdd_shuttle.decision_function(X_test_shuttle)

ba_deep = balanced_accuracy_score(y_test_shuttle, y_pred_deep)
auc_deep = roc_auc_score(y_test_shuttle, y_scores_deep)

print("\nDeep SVDD Results (Default Architecture):")
print(f"Balanced Accuracy: {ba_deep:.4f}")
print(f"ROC AUC: {auc_deep:.4f}")

### 3.3

In [None]:
# Architecture 1: Shallow network
deep_svdd_arch1 = DeepSVDD(
    hidden_neurons=[32, 16],
    contamination=contamination_shuttle,
    epochs=50,
    verbose=0,
    random_state=42,
    n_features=X_train_shuttle.shape[1]
)
deep_svdd_arch1.fit(X_train_shuttle)
y_pred_arch1 = deep_svdd_arch1.predict(X_test_shuttle)
y_scores_arch1 = deep_svdd_arch1.decision_function(X_test_shuttle)
ba_arch1 = balanced_accuracy_score(y_test_shuttle, y_pred_arch1)
auc_arch1 = roc_auc_score(y_test_shuttle, y_scores_arch1)

print("Architecture 1 (Shallow: [32, 16]):")
print(f"Balanced Accuracy: {ba_arch1:.4f}")
print(f"ROC AUC: {auc_arch1:.4f}")

In [None]:
# Architecture 2: Deep network
deep_svdd_arch2 = DeepSVDD(
    hidden_neurons=[128, 64, 32, 16],
    contamination=contamination_shuttle,
    epochs=50,
    verbose=0,
    random_state=42,
    n_features=X_train_shuttle.shape[1]
)
deep_svdd_arch2.fit(X_train_shuttle)
y_pred_arch2 = deep_svdd_arch2.predict(X_test_shuttle)
y_scores_arch2 = deep_svdd_arch2.decision_function(X_test_shuttle)
ba_arch2 = balanced_accuracy_score(y_test_shuttle, y_pred_arch2)
auc_arch2 = roc_auc_score(y_test_shuttle, y_scores_arch2)

print("\nArchitecture 2 (Deep: [128, 64, 32, 16]):")
print(f"Balanced Accuracy: {ba_arch2:.4f}")
print(f"ROC AUC: {auc_arch2:.4f}")

In [None]:
# Architecture 3: Wide network
deep_svdd_arch3 = DeepSVDD(
    hidden_neurons=[64, 64, 32],
    contamination=contamination_shuttle,
    epochs=50,
    verbose=0,
    random_state=42,
    n_features=X_train_shuttle.shape[1]
)
deep_svdd_arch3.fit(X_train_shuttle)
y_pred_arch3 = deep_svdd_arch3.predict(X_test_shuttle)
y_scores_arch3 = deep_svdd_arch3.decision_function(X_test_shuttle)
ba_arch3 = balanced_accuracy_score(y_test_shuttle, y_pred_arch3)
auc_arch3 = roc_auc_score(y_test_shuttle, y_scores_arch3)

print("\nArchitecture 3 (Wide: [64, 64, 32]):")
print(f"Balanced Accuracy: {ba_arch3:.4f}")
print(f"ROC AUC: {auc_arch3:.4f}")