In [1]:
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import os
os.environ["LOKY_MAX_CPU_COUNT"] = "-1"  # Set to 1 or the desired number of cores

In [2]:
# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.astype('float32') / 255.0  # Normalize the data between 0 and 1
y = mnist.target.astype(int)  # Convert target to integer

print(f"Shape of dataset: {X.shape}")

Shape of dataset: (70000, 784)


In [3]:
# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [4]:
# Apply PCA to reduce the number of features (e.g., to 50 components)
pca = PCA(n_components=50)
X_pca = pca.fit_transform(X_scaled)

print(f"Shape after PCA: {X_pca.shape}")


Shape after PCA: (70000, 50)


In [None]:
# Initialize the One-Class SVM
oc_svm = OneClassSVM(kernel='rbf', gamma='auto', nu=0.1)

# Train the model on the PCA-reduced data
oc_svm.fit(X_pca)

In [None]:
# Predict anomalies (-1 for anomaly, 1 for normal)
predictions = oc_svm.predict(X_pca)

# Map the predictions to binary (1 = anomaly, 0 = normal)
predictions_binary = np.where(predictions == 1, 0, 1)  # 0 for normal, 1 for anomalies


In [None]:
# Assume digit '0' is the normal class, everything else is treated as an anomaly
y_true = np.where(y == 0, 0, 1)  # 0 for normal (digit 0), 1 for anomalies

# Calculate accuracy
accuracy = accuracy_score(y_true, predictions_binary)
print(f"Accuracy of One-Class SVM with PCA: {accuracy:.2f}")

# Confusion matrix
conf_mat = confusion_matrix(y_true, predictions_binary)

# Visualize the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='viridis', xticklabels=['Normal', 'Anomaly'], yticklabels=['True Normal', 'True Anomaly'])
plt.title('Confusion Matrix for One-Class SVM with PCA on MNIST')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()


In [None]:
# Get the indices of the anomalies
anomaly_indices = np.where(predictions == -1)[0]

# Convert X to NumPy for visualization
X_np = X.to_numpy()

# Visualize the first 10 anomalies
plt.figure(figsize=(10, 5))
for i, idx in enumerate(anomaly_indices[:10]):
    plt.subplot(2, 5, i+1)
    plt.imshow(X_np[idx].reshape(28, 28), cmap='gray')
    plt.title(f"Anomaly {i+1}")
    plt.axis('off')
plt.tight_layout()
plt.show()
