### Display the versions of the libraries used for reference purposes.

In [17]:
import sys
import numpy as np
import tensorflow as tf
import sklearn
import torch
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import notebook

# Print Python version
print(f'Python version: {sys.version}')

# Print Jupyter Notebook version
print(f'Jupyter Notebook version: {notebook.__version__}')

# Print library versions
print(f'NumPy version: {np.__version__}')
print(f'TensorFlow version: {tf.__version__}')
print(f'Torch version: {torch.__version__}')
print(f'Scikit-learn version: {sklearn.__version__}')

Python version: 3.12.9 | packaged by Anaconda, Inc. | (main, Feb  6 2025, 18:49:16) [MSC v.1929 64 bit (AMD64)]
Jupyter Notebook version: 7.3.2
NumPy version: 2.0.1
TensorFlow version: 2.19.0
Torch version: 2.6.0+cu126
Scikit-learn version: 1.6.1


### Loading MNIST dataset and Splitting data

In [19]:
from sklearn.datasets import fetch_openml

import joblib

# -------------------------
# Load and Preprocess the MNIST Data
# -------------------------
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.values
y = mnist.target.astype(int).values

# Split the data into training and testing sets (using random_state=42)
X_train_not_scaled, X_test_not_scaled, y_train_not_scaled, y_test_not_scaled = train_test_split(X, y, test_size=0.2, random_state=42,stratify=y)

# Save the numpy arrays for the train and test splits BEFORE scaling
joblib.dump((X_train_not_scaled, X_test_not_scaled, y_train_not_scaled, y_test_not_scaled), 'Models and Data splits/[ORIGINAL] Train_Test_Splits.pkl')

X_train_scaled  = X_train_not_scaled /255.0
X_test_scaled  = X_test_not_scaled  /255.0
y_train_scaled  = y_train_not_scaled
y_test_scaled  = y_test_not_scaled 

# Save the numpy arrays for the train and test splits AFTER scaling
joblib.dump((X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled), 'Models and Data splits/[SCALED] Train_Test_Splits.pkl')


['Models and Data splits/[SCALED] Train_Test_Splits.pkl']

### Training the main model MLP with one hidden layer

In [21]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import random
import joblib
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

# -------------------------
# Set seeds for reproducibility
# -------------------------
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    # Optional: for determinism (may impact performance)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# -------------------------
# Loading the scaled MNIST Data
# -------------------------

X_train_np, X_test_np, y_train_np, y_test_np = joblib.load( 'Models and Data splits/[SCALED] Train_Test_Splits.pkl')


# Convert numpy arrays to PyTorch tensors
X_train = torch.tensor(X_train_np, dtype=torch.float32)
X_test = torch.tensor(X_test_np, dtype=torch.float32)
y_train = torch.tensor(y_train_np, dtype=torch.long)
y_test = torch.tensor(y_test_np, dtype=torch.long)

# Create DataLoaders for mini-batch training
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# -------------------------
# Define the MLP Model using PyTorch
# -------------------------
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_units, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_units)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_units, output_dim)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        # The output logits will be used in the CrossEntropyLoss, so no softmax here
        x = self.fc2(x)
        return x

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLP(input_dim=X_train.shape[1], hidden_units=256, output_dim=10)
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# -------------------------
# Train the Model
# -------------------------
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_X, batch_y in train_loader:
        # Move data to GPU if available
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

# -------------------------
# Evaluate the Model on the Test Data
# -------------------------
model.eval()
all_preds = []
all_true = []
with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X)
        # Get predicted class using the index with maximum logit score
        _, predicted = torch.max(outputs, 1)
        all_preds.append(predicted.cpu().numpy())
        all_true.append(batch_y.cpu().numpy())

y_pred = np.concatenate(all_preds)
y_true = np.concatenate(all_true)

# Compute evaluation metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='macro')

print(f"\nAccuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("--------------")

# Compute the confusion matrix and false positives per class
cm = confusion_matrix(y_true, y_pred)
false_alarms = cm.sum(axis=0) - np.diag(cm)

print("Confusion Matrix:\n", cm)
print("False Alarms for each class:", false_alarms)
print("False Alarms for all classes:", false_alarms.sum())



# Save the scripted model
model = torch.jit.script(model)
torch.jit.save(model, 'Models and Data splits/MainModel_MLP1L.pt')

Epoch [1/100], Loss: 0.3059
Epoch [2/100], Loss: 0.1286
Epoch [3/100], Loss: 0.0864
Epoch [4/100], Loss: 0.0638
Epoch [5/100], Loss: 0.0478
Epoch [6/100], Loss: 0.0360
Epoch [7/100], Loss: 0.0270
Epoch [8/100], Loss: 0.0210
Epoch [9/100], Loss: 0.0163
Epoch [10/100], Loss: 0.0139
Epoch [11/100], Loss: 0.0110
Epoch [12/100], Loss: 0.0088
Epoch [13/100], Loss: 0.0068
Epoch [14/100], Loss: 0.0076
Epoch [15/100], Loss: 0.0053
Epoch [16/100], Loss: 0.0056
Epoch [17/100], Loss: 0.0053
Epoch [18/100], Loss: 0.0045
Epoch [19/100], Loss: 0.0039
Epoch [20/100], Loss: 0.0033
Epoch [21/100], Loss: 0.0047
Epoch [22/100], Loss: 0.0025
Epoch [23/100], Loss: 0.0054
Epoch [24/100], Loss: 0.0027
Epoch [25/100], Loss: 0.0049
Epoch [26/100], Loss: 0.0034
Epoch [27/100], Loss: 0.0039
Epoch [28/100], Loss: 0.0027
Epoch [29/100], Loss: 0.0042
Epoch [30/100], Loss: 0.0023
Epoch [31/100], Loss: 0.0008
Epoch [32/100], Loss: 0.0002
Epoch [33/100], Loss: 0.0001
Epoch [34/100], Loss: 0.0000
Epoch [35/100], Loss: 0

## Loading and using the same train-test split for other methods

### MLP with 2 layers, each having 256 neurons

In [24]:
import joblib
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt

# -------------------------
# Set seeds for reproducibility
# -------------------------
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    # Optional: for determinism (may impact performance)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
# Check for GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# -------------------------
# Loading the scaled MNIST Data
# -------------------------
X_train_np, X_test_np, y_train_np, y_test_np = joblib.load( 'Models and Data splits/[SCALED] Train_Test_Splits.pkl')


# Convert the numpy arrays to torch tensors and send them to the GPU (if available)
# Ensure feature data are floats and labels are of long type for classification.
X_train = torch.from_numpy(X_train_np).float().to(device)
X_test = torch.from_numpy(X_test_np).float().to(device)
y_train = torch.from_numpy(y_train_np).long().to(device)
y_test = torch.from_numpy(y_test_np).long().to(device)

# Create a Dataset and DataLoader for the training data
batch_size = 64
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Define the MLP model using PyTorch's nn.Module
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_units, output_dim):
        super(MLP, self).__init__()
        # First hidden layer
        self.fc1 = nn.Linear(input_dim, hidden_units)
        # Second hidden layer
        self.fc2 = nn.Linear(hidden_units, hidden_units)
        # Output layer
        self.out = nn.Linear(hidden_units, output_dim)
        
    def forward(self, x):
        # Apply first layer and activation
        x = torch.relu(self.fc1(x))
        # Second layer and activation
        x = torch.relu(self.fc2(x))
        # Note: Do not apply softmax here when using CrossEntropyLoss.
        x = self.out(x)
        return x

# Model hyperparameters
input_dim = X_train.shape[1]
hidden_units = 256
output_dim = 10  # adjust based on your problem's number of classes

# Instantiate and move the model to the GPU
model = MLP(input_dim, hidden_units, output_dim).to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for batch_X, batch_y in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * batch_X.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

# Evaluation on the test data
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    # Get the predicted classes from the logits
    _, y_pred_tensor = torch.max(test_outputs, 1)
    # Bring tensors back to cpu and convert to numpy arrays for scikit-learn metrics
    y_pred = y_pred_tensor.cpu().numpy()
    y_test_np = y_test.cpu().numpy()
    
# Calculate evaluation metrics
accuracy = accuracy_score(y_test_np, y_pred)
precision = precision_score(y_test_np, y_pred, average='macro')
recall = recall_score(y_test_np, y_pred, average='macro')
f1 = f1_score(y_test_np, y_pred, average='macro')

print(f"\nEvaluation Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("------------------------")

# Compute the confusion matrix
cm = confusion_matrix(y_test_np, y_pred)

# Calculate false alarms for each class 
# (sum of column values except the diagonal elements)
false_alarms = cm.sum(axis=0) - np.diag(cm)

print("Confusion Matrix:\n", cm)
print("False Alarms for each class:", false_alarms)
print("False Alarms for all classes:", false_alarms.sum())

# Save the scripted model
model = torch.jit.script(model)
torch.jit.save(model, 'Models and Data splits/MLP2L.pt')


Using device: cuda
Epoch [1/100], Loss: 0.2693
Epoch [10/100], Loss: 0.0172
Epoch [20/100], Loss: 0.0123
Epoch [30/100], Loss: 0.0065
Epoch [40/100], Loss: 0.0054
Epoch [50/100], Loss: 0.0023
Epoch [60/100], Loss: 0.0034
Epoch [70/100], Loss: 0.0022
Epoch [80/100], Loss: 0.0049
Epoch [90/100], Loss: 0.0047
Epoch [100/100], Loss: 0.0000

Evaluation Metrics:
Accuracy: 0.9836
Precision: 0.9836
Recall: 0.9835
F1 Score: 0.9835
------------------------
Confusion Matrix:
 [[1377    1    1    0    0    0    1    0    1    0]
 [   1 1557    5    5    1    0    1    4    1    0]
 [   3    2 1377    2    0    1    1    7    4    1]
 [   0    0    5 1401    0   10    0    3    8    1]
 [   1    4    2    0 1334    0    5    0    0   19]
 [   2    0    3    3    1 1228   10    2    8    6]
 [   2    2    0    1    3    5 1361    0    1    0]
 [   2    3    6    1   10    0    1 1433    2    1]
 [   2    4    2    2    1    1    4    1 1341    7]
 [   2    2    0    5    9    2    1    5    3 1362]]

### CNN with 2 dense layer, one 128 and the other 64 neurons

In [26]:
import joblib
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

import random  # Make sure to import random

# -------------------------
# Set seeds for reproducibility
# -------------------------
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# 1. Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 2. Load data (X_train, X_test, y_train, y_test) from pickle
X_train_np, X_test_np, y_train_np, y_test_np = joblib.load( 'Models and Data splits/[SCALED] Train_Test_Splits.pkl')


# 3. Reshape data for PyTorch CNN: from (N, 28, 28, 1) to (N, 1, 28, 28)
N_train = X_train_np.shape[0]
N_test = X_test_np.shape[0]
X_train_np = X_train_np.reshape(N_train, 1, 28, 28)
X_test_np = X_test_np.reshape(N_test, 1, 28, 28)

# 4. Convert numpy arrays to torch tensors, move them to device
X_train = torch.from_numpy(X_train_np).float().to(device)
y_train = torch.from_numpy(y_train_np).long().to(device)
X_test = torch.from_numpy(X_test_np).float().to(device)
y_test = torch.from_numpy(y_test_np).long().to(device)

# 5. Create DataLoader for the training data
train_dataset = TensorDataset(X_train, y_train)
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# 6. Define the CNN model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=0)
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(32 * 13 * 13, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = CNNModel().to(device)

# 7. Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 8. Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * batch_X.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

# 9. Evaluation on test set
model.eval()
with torch.no_grad():
    outputs_test = model(X_test)
    _, predicted = torch.max(outputs_test, 1)

y_pred = predicted.cpu().numpy()
y_true = y_test.cpu().numpy()

# 10. Evaluation metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='macro')

print("\nEvaluation Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# 11. Compute confusion matrix and false alarms
cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix:")
print(cm)

false_alarms = cm.sum(axis=0) - np.diag(cm)
print("\nFalse Alarms for each class:", false_alarms)
print("False Alarms for all classes:", false_alarms.sum())

# Save the scripted model
model = torch.jit.script(model)
torch.jit.save(model, 'Models and Data splits/CNN.pt')


Using device: cuda
Epoch [1/100], Loss: 0.2815
Epoch [10/100], Loss: 0.0086
Epoch [20/100], Loss: 0.0042
Epoch [30/100], Loss: 0.0011
Epoch [40/100], Loss: 0.0000
Epoch [50/100], Loss: 0.0000
Epoch [60/100], Loss: 0.0000
Epoch [70/100], Loss: 0.0028
Epoch [80/100], Loss: 0.0029
Epoch [90/100], Loss: 0.0000
Epoch [100/100], Loss: 0.0000

Evaluation Metrics:
Accuracy: 0.9878
Precision: 0.9877
Recall: 0.9877
F1 Score: 0.9877

Confusion Matrix:
[[1376    1    0    0    0    1    1    0    2    0]
 [   0 1565    3    0    1    0    1    4    1    0]
 [   3    0 1376    4    1    0    2    8    4    0]
 [   1    0    3 1408    0    8    1    2    3    2]
 [   1    1    0    0 1343    0    5    1    0   14]
 [   1    0    0    6    0 1250    2    1    2    1]
 [   7    2    0    0    3    2 1359    0    2    0]
 [   1    2    3    1    2    0    0 1445    2    3]
 [   2    2    2    3    0    3    3    1 1345    4]
 [   0    3    1    1    8    6    1    7    2 1362]]

False Alarms for each c

### KNN

In [28]:
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib
from sklearn.metrics import confusion_matrix


# Loading the pre-split data for reference 
X_train, X_test, y_train, y_test = joblib.load( 'Models and Data splits/[SCALED] Train_Test_Splits.pkl')

# Define the kNN model
model = KNeighborsClassifier(n_neighbors=5)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # 'macro' averages for multi-class classification
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("--------------------")

# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

# False alarms for each class (sum of columns except the diagonal)
false_alarms = cm.sum(axis=0) - np.diagonal(cm)

# Print false alarms for each class
print("False alarms for each class:", false_alarms)
print("False alarms for all classes:", false_alarms.sum())

#Saving the model 
joblib.dump(model, 'Models and Data splits/kNN.pkl')

Accuracy: 0.9696
Precision: 0.9701
Recall: 0.9692
F1 Score: 0.9695
--------------------
Confusion Matrix:
 [[1375    1    0    0    0    1    3    0    0    1]
 [   0 1564    5    1    1    0    1    3    0    0]
 [  12   14 1335    4    2    0    4   24    3    0]
 [   1    2    7 1386    0   12    2    9    6    3]
 [   2   15    1    0 1306    0    5    1    0   35]
 [   6    3    1    7    1 1217   17    2    3    6]
 [   7    3    0    0    1    8 1355    0    1    0]
 [   1   17    3    0    2    1    0 1423    0   12]
 [   8   12    1   22    4   29    4    4 1275    6]
 [   2    8    1   13   11    1    1   13    2 1339]]
False alarms for each class: [39 75 19 47 22 52 37 56 15 63]
False alarms for all classes: 425


['Models and Data splits/kNN.pkl']

## Ensemble Method: RF

In [30]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib
from sklearn.metrics import confusion_matrix

 
# Loading the pre-split data for reference 
X_train, X_test, y_train, y_test = joblib.load( 'Models and Data splits/[SCALED] Train_Test_Splits.pkl')


# Define the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)


# Make predictions
y_pred = model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # 'macro' averages for multi-class classification
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("-----------------------")

# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# False alarms for each class (sum of columns except the diagonal)
false_alarms = cm.sum(axis=0) - np.diagonal(cm)

print("Confusion Matrix:\n", cm)

# Print false alarms for each class
print("False Alarms for each class:", false_alarms)

print("False Alarms for all classes:", false_alarms.sum())

#Saving the train and test sets 
joblib.dump(model, 'Models and Data splits/RF.pkl')

Accuracy: 0.9672
Precision: 0.9670
Recall: 0.9669
F1 Score: 0.9669
-----------------------
Confusion Matrix:
 [[1371    1    0    0    1    0    2    0    5    1]
 [   0 1548    9    7    1    1    2    5    1    1]
 [   8    1 1350    3    6    1    7   14    7    1]
 [   3    1   14 1368    0   14    2   12   12    2]
 [   4    3    1    0 1312    0   10    0    4   31]
 [   5    1    2   17    3 1207   11    2    7    8]
 [   9    2    1    0    3    4 1350    0    6    0]
 [   1    5   13    0    9    0    0 1414    3   14]
 [   3    3    8    7    3   11    3    0 1308   19]
 [   8    7    1   16   18    4    1   12   11 1313]]
False Alarms for each class: [41 24 49 50 44 35 38 45 56 77]
False Alarms for all classes: 459


['Models and Data splits/RF.pkl']

# MLP1L with StandardScaler

In [39]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import random
import joblib
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

# -------------------------
# Set seeds for reproducibility
# -------------------------
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    # Optional: for determinism (may impact performance)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# -------------------------
# Loading the original MNIST Data
# -------------------------
X_train_np, X_test_np, y_train_np, y_test_np = joblib.load('Models and Data splits/[ORIGINAL] Train_Test_Splits.pkl')

# Initialize StandardScaler
scaler = StandardScaler()

# Fit the scaler on the training data and transform it
X_train_scaled = scaler.fit_transform(X_train_np)

# Transform the test data using the same scaler
X_test_scaled = scaler.transform(X_test_np)

# Save the scaler to a file for later use
joblib.dump(scaler, 'Models and Data splits/scaler.pkl')

# Convert scaled data to PyTorch tensors
X_train = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train = torch.tensor(y_train_np, dtype=torch.long)
y_test = torch.tensor(y_test_np, dtype=torch.long)

# Create DataLoaders for mini-batch training
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# -------------------------
# Define the MLPSca Model using PyTorch
# -------------------------
class MLPSca(nn.Module):
    def __init__(self, input_dim, hidden_units, output_dim):
        super(MLPSca, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_units)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_units, output_dim)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        # The output logits will be used in the CrossEntropyLoss, so no softmax here
        x = self.fc2(x)
        return x

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLPSca(input_dim=X_train.shape[1], hidden_units=256, output_dim=10)
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# -------------------------
# Train the Model
# -------------------------
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_X, batch_y in train_loader:
        # Move data to GPU if available
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

# -------------------------
# Evaluate the Model on the Test Data
# -------------------------
model.eval()
all_preds = []
all_true = []
with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X)
        # Get predicted class using the index with maximum logit score
        _, predicted = torch.max(outputs, 1)
        all_preds.append(predicted.cpu().numpy())
        all_true.append(batch_y.cpu().numpy())

y_pred = np.concatenate(all_preds)
y_true = np.concatenate(all_true)

# Compute evaluation metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='macro')

print(f"\nAccuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("--------------")

# Compute the confusion matrix and false positives per class
cm = confusion_matrix(y_true, y_pred)
false_alarms = cm.sum(axis=0) - np.diag(cm)

print("Confusion Matrix:\n", cm)
print("False Alarms for each class:", false_alarms)
print("False Alarms for all classes:", false_alarms.sum())

# Save the scripted model
model = torch.jit.script(model)
torch.jit.save(model, 'Models and Data splits/MainModel_MLPSca.pt')


Epoch [1/100], Loss: 0.2215
Epoch [2/100], Loss: 0.0935
Epoch [3/100], Loss: 0.0604
Epoch [4/100], Loss: 0.0411
Epoch [5/100], Loss: 0.0294
Epoch [6/100], Loss: 0.0264
Epoch [7/100], Loss: 0.0195
Epoch [8/100], Loss: 0.0187
Epoch [9/100], Loss: 0.0178
Epoch [10/100], Loss: 0.0307
Epoch [11/100], Loss: 0.0157
Epoch [12/100], Loss: 0.0116
Epoch [13/100], Loss: 0.0061
Epoch [14/100], Loss: 0.0074
Epoch [15/100], Loss: 0.0314
Epoch [16/100], Loss: 0.0185
Epoch [17/100], Loss: 0.0071
Epoch [18/100], Loss: 0.0069
Epoch [19/100], Loss: 0.0104
Epoch [20/100], Loss: 0.0163
Epoch [21/100], Loss: 0.0091
Epoch [22/100], Loss: 0.0074
Epoch [23/100], Loss: 0.0257
Epoch [24/100], Loss: 0.0180
Epoch [25/100], Loss: 0.0294
Epoch [26/100], Loss: 0.0148
Epoch [27/100], Loss: 0.0126
Epoch [28/100], Loss: 0.0067
Epoch [29/100], Loss: 0.0055
Epoch [30/100], Loss: 0.0090
Epoch [31/100], Loss: 0.0096
Epoch [32/100], Loss: 0.0161
Epoch [33/100], Loss: 0.0170
Epoch [34/100], Loss: 0.0100
Epoch [35/100], Loss: 0

### CNN with different Scaler

In [2]:
import joblib
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import random

# -------------------------
# Reproducibility
# -------------------------
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# -------------------------
# Device
# -------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# -----------------------------------------------------
# 1. Load ORIGINAL (un‑scaled) data
# -----------------------------------------------------
X_train_np, X_test_np, y_train_np, y_test_np = joblib.load(
    "Models and Data splits/[ORIGINAL] Train_Test_Splits.pkl"
)

# -----------------------------------------------------
# 2. Standard‑scale pixel intensities
#    (fit on TRAIN, transform TRAIN & TEST)
# -----------------------------------------------------
scaler = StandardScaler()

N_train = X_train_np.shape[0]
N_test  = X_test_np.shape[0]

# Flatten → scale → reshape
X_train_flat = X_train_np.reshape(N_train, -1).astype(np.float32)
X_test_flat  = X_test_np.reshape(N_test,  -1).astype(np.float32)

X_train_flat = scaler.fit_transform(X_train_flat)
X_test_flat  = scaler.transform(X_test_flat)

X_train_np = X_train_flat.reshape(N_train, 1, 28, 28)
X_test_np  = X_test_flat.reshape(N_test,  1, 28, 28)

# -----------------------------------------------------
# 3. Convert to tensors
# -----------------------------------------------------
X_train = torch.from_numpy(X_train_np).float().to(device)
y_train = torch.from_numpy(y_train_np).long().to(device)
X_test  = torch.from_numpy(X_test_np).float().to(device)
y_test  = torch.from_numpy(y_test_np).long().to(device)

# -----------------------------------------------------
# 4. DataLoader
# -----------------------------------------------------
train_dataset = TensorDataset(X_train, y_train)
train_loader  = DataLoader(train_dataset, batch_size=64, shuffle=True)

# -----------------------------------------------------
# 5. CNN definition
# -----------------------------------------------------
class CNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)   # (28‑3+1)=26 → pool → 13
        self.pool  = nn.MaxPool2d(2)
        self.fc1   = nn.Linear(32 * 13 * 13, 128)
        self.fc2   = nn.Linear(128, 64)
        self.fc3   = nn.Linear(64, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

model = CNNModel().to(device)

# -----------------------------------------------------
# 6. Loss & optimiser
# -----------------------------------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# -----------------------------------------------------
# 7. Training
# -----------------------------------------------------
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * batch_X.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    if epoch == 0 or (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}]  Loss: {epoch_loss:.4f}")

# -----------------------------------------------------
# 8. Evaluation
# -----------------------------------------------------
model.eval()
with torch.no_grad():
    outputs_test = model(X_test)
    _, predicted = torch.max(outputs_test, 1)

y_pred = predicted.cpu().numpy()
y_true = y_test.cpu().numpy()

accuracy  = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average="macro")
recall    = recall_score(y_true, y_pred, average="macro")
f1        = f1_score(y_true, y_pred, average="macro")

print("\nEvaluation Metrics")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")

cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix")
print(cm)

false_alarms = cm.sum(axis=0) - np.diag(cm)
print("\nFalse Alarms per class:", false_alarms)
print("Total False Alarms    :", false_alarms.sum())

# -----------------------------------------------------
# 9. Save the scripted model
# -----------------------------------------------------
scripted = torch.jit.script(model)
torch.jit.save(scripted, "Models and Data splits/CNNScl.pt")


Using device: cuda
Epoch [1/100]  Loss: 0.2202
Epoch [10/100]  Loss: 0.0130
Epoch [20/100]  Loss: 0.0115
Epoch [30/100]  Loss: 0.0043
Epoch [40/100]  Loss: 0.0072
Epoch [50/100]  Loss: 0.0025
Epoch [60/100]  Loss: 0.0079
Epoch [70/100]  Loss: 0.0076
Epoch [80/100]  Loss: 0.0000
Epoch [90/100]  Loss: 0.0000
Epoch [100/100]  Loss: 0.0000

Evaluation Metrics
Accuracy : 0.9859
Precision: 0.9859
Recall   : 0.9858
F1 Score : 0.9858

Confusion Matrix
[[1375    1    2    1    0    0    2    0    0    0]
 [   0 1558    4    2    1    1    2    7    0    0]
 [   1    2 1374    2    2    0    5    6    4    2]
 [   1    3    2 1411    0    4    0    4    3    0]
 [   0    3    1    0 1337    0    5    3    0   16]
 [   1    0    0    4    0 1243    5    1    3    6]
 [   7    2    0    0    2    3 1359    0    2    0]
 [   1    2    8    1    1    0    0 1432    2   12]
 [   4    2    5    4    1    3    2    1 1339    4]
 [   0    2    0    0    2    3    0    7    3 1374]]

False Alarms per cla