#### Loading Pre-Split Data and Trained Model

In [2]:
import joblib
import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

X_train, X_test, y_train, y_test = joblib.load(r"Train_Test_Splits.pkl")
model = joblib.load(r"MainModel_1L_MLP.pkl")

#### Evaluating the already trained Model Performance for Reference with Pre-Split Data

In [4]:
from sklearn.metrics import confusion_matrix

# Evaluate the model on the test data
y_pred = model.predict(X_test, verbose=0)
y_pred = np.argmax(y_pred, axis=1)  # Convert predictions to class labels

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # 'macro' averages for multi-class classification
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

print("--------------")


# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# False positives for each class (sum of columns except the diagonal)
false_positives = cm.sum(axis=0) - np.diagonal(cm)

# Print false positives for each class
print("False Alarms for each class:", false_positives)
print("False Alarms for all classes:", false_positives.sum())


Accuracy: 0.9739
Precision: 0.9737
Recall: 0.9736
F1 Score: 0.9736
--------------
False Alarms for each class: [14 17 46 46 30 47 21 50 38 57]
False Alarms for all classes: 366


### [MLP with 1 Layer] Equal Distribution Sampling for Correctly Classified MNIST already-seen Data (500 Samples of train set)

In [8]:
import numpy as np
from sklearn.metrics import accuracy_score

# Predict the labels of the train set
train_y_pred = model.predict(X_train)

# Convert one-hot encoded predictions to class labels (if applicable)
train_y_pred_classes = np.argmax(train_y_pred, axis=1)

# Identify the correctly classified samples
correctly_classified = np.where(train_y_pred_classes == y_train)[0]

# We need to sample equally from each digit (0-9)
train_sampled_indices = []
for digit in range(10):
    # Get indices of correctly classified samples for each digit
    indices = correctly_classified[y_train[correctly_classified] == digit]
    
    # Randomly sample 50 indices for each digit (500 total, 50 samples for each digit)
    train_sampled_indices.extend(np.random.choice(indices, 50, replace=False))

# Get the sampled samples and labels
train_X_sampled = X_train[train_sampled_indices]
train_y_sampled = y_train[train_sampled_indices]

#Saving the model
joblib.dump((train_X_sampled,train_y_sampled), 'Seen_500__Sampled_data.pkl')

[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step


['Seen_500__Sampled_data.pkl']

###  [MLP with 1 Layer] Equal Distribution Sampling for Correctly Classified MNIST Unseen Data (500 Samples of test set)

In [10]:
import numpy as np
from sklearn.metrics import accuracy_score

# Predict the labels of the test set
y_pred = model.predict(X_test)

# Convert one-hot encoded predictions to class labels (if applicable)
y_pred_classes = np.argmax(y_pred, axis=1)

# Identify the correctly classified samples
correctly_classified = np.where(y_pred_classes == y_test)[0]

# We need to sample equally from each digit (0-9)
sampled_indices = []
for digit in range(10):
    # Get indices of correctly classified samples for each digit
    indices = correctly_classified[y_test[correctly_classified] == digit]
    
    # Randomly sample 50 indices for each digit (500 total, 50 samples for each digit)
    sampled_indices.extend(np.random.choice(indices, 50, replace=False))

# Get the sampled samples and labels
X_sampled = X_test[sampled_indices]
y_sampled = y_test[sampled_indices]


#Saving the model
joblib.dump((X_sampled,y_sampled), 'Unseen_500_Sampled_data.pkl')

[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step


['Unseen_500_Sampled_data.pkl']

### Equal Distribution Sampling for Correctly Classified MNIST already-seen Data (100 Samples of train set)

In [12]:

# Predict the labels of the train set
train10_y_pred = model.predict(X_train)

# Convert one-hot encoded predictions to class labels (if applicable)
train10_y_pred_classes = np.argmax(train10_y_pred, axis=1)

# Identify the correctly classified samples
correctly10_classified = np.where(train10_y_pred_classes == y_train)[0]

# We need to sample equally from each digit (0-9)
train10_sampled_indices = []
for digit in range(10):
    # Get indices of correctly classified samples for each digit
    indices = correctly10_classified[y_train[correctly10_classified] == digit]
    
    # Randomly sample 50 indices for each digit (100 total, 10 samples for each digit)
    train10_sampled_indices.extend(np.random.choice(indices, 10, replace=False))

# Get the sampled samples and labels
train10_X_sampled = X_train[train10_sampled_indices]
train10_y_sampled = y_train[train10_sampled_indices]


#Saving the model
joblib.dump((train10_X_sampled,train10_y_sampled), 'Seen_100__Sampled_data.pkl')

[1m1750/1750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step


['Seen_100__Sampled_data.pkl']