In [1]:
import sys
import numpy as np
import tensorflow as tf
import sklearn
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import notebook

# Print Python version
print(f'Python version: {sys.version}')

# Print Jupyter Notebook version
print(f'Jupyter Notebook version: {notebook.__version__}')

# Print library versions
print(f'NumPy version: {np.__version__}')
print(f'TensorFlow version: {tf.__version__}')
print(f'Scikit-learn version: {sklearn.__version__}')

Python version: 3.12.7 | packaged by Anaconda, Inc. | (main, Oct  4 2024, 13:17:27) [MSC v.1929 64 bit (AMD64)]
Jupyter Notebook version: 7.2.2
NumPy version: 1.26.4
TensorFlow version: 2.18.0
Scikit-learn version: 1.5.1


In [3]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
import joblib

# Load MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.values
y = mnist.target.astype(int).values

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define the MLP model
def create_mlp(input_dim, hidden_units, output_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(hidden_units, input_dim=input_dim, activation='relu'),
        tf.keras.layers.Dense(output_dim, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Train the MLP model
model = create_mlp(input_dim=X_train.shape[1], hidden_units=256, output_dim=10)
model.fit(X_train, y_train, epochs=100, batch_size=64, verbose=1)

# Evaluate the model on the test data
y_pred = model.predict(X_test, verbose=0)
y_pred = np.argmax(y_pred, axis=1)  # Convert predictions to class labels

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # 'macro' averages for multi-class classification
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

print("--------------")


# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# False positives for each class (sum of columns except the diagonal)
false_positives = cm.sum(axis=0) - np.diagonal(cm)

# Print false positives for each class
print("False Alarms for each class:", false_positives)
print("False Alarms for all classes:", false_positives.sum())

#Saving the train and test sets 
joblib.dump((X_train, X_test, y_train, y_test), 'Train_Test_Splits.pkl')

#Saving the model
joblib.dump(model, 'MainModel_1L_MLP.pkl')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8857 - loss: 0.3928
Epoch 2/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9716 - loss: 0.1095
Epoch 3/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9819 - loss: 0.0689
Epoch 4/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9883 - loss: 0.0390
Epoch 5/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9909 - loss: 0.0317
Epoch 6/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9933 - loss: 0.0234
Epoch 7/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9932 - loss: 0.0208
Epoch 8/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9944 - loss: 0.0173
Epoch 9/100
[1m875/875[0m [32

['MainModel_1L_MLP.pkl']

### Loading and using the same train-test split for other methods

### MLP with 2 layers, each having 256 neurons

In [5]:
import joblib
import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix


X_train, X_test, y_train, y_test = joblib.load(r"Train_Test_Splits.pkl")

# Define the MLP model
def create_mlp(input_dim, hidden_units, output_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(hidden_units, input_dim=input_dim, activation='relu'),
        tf.keras.layers.Dense(hidden_units, input_dim=input_dim, activation='relu'),
        tf.keras.layers.Dense(output_dim, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Train the MLP model
model = create_mlp(input_dim=X_train.shape[1], hidden_units=256, output_dim=10)
model.fit(X_train, y_train, epochs=100, batch_size=64, verbose=1)

# Evaluate the model on the test data
y_pred = model.predict(X_test, verbose=0)
y_pred = np.argmax(y_pred, axis=1)  # Convert predictions to class labels

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # 'macro' averages for multi-class classification
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("------------------------")

# Evaluate the model on the test data
y_pred = model.predict(X_test, verbose=0)
y_pred = np.argmax(y_pred, axis=1)  # Convert predictions to class labels

# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# False positives for each class (sum of columns except the diagonal)
false_positives = cm.sum(axis=0) - np.diagonal(cm)

# Print false positives for each class
print("False Positives for each class:", false_positives)
print("False Positives for all classes:", false_positives.sum())

#Saving the model 
joblib.dump(model, 'MLP2L.pkl')

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8854 - loss: 0.3810
Epoch 2/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9739 - loss: 0.0883
Epoch 3/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9834 - loss: 0.0538
Epoch 4/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9873 - loss: 0.0393
Epoch 5/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9892 - loss: 0.0382
Epoch 6/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9905 - loss: 0.0324
Epoch 7/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9941 - loss: 0.0178
Epoch 8/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9920 - loss: 0.0260
Epoch 9/100
[1m875/875[0m [32m━━━━━━━━━━━

['MLP2L.pkl']

### CNN with 2 dense layer, one 128 and the other 64 neurons

In [7]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix


X_train, X_test, y_train, y_test = joblib.load(r"Train_Test_Splits.pkl")

# Preprocess the data: Normalize and reshape to (28, 28, 1)
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)


# Define the model with a convolutional layer and two hidden layers
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),  # Convolutional layer with 32 filters, kernel size 3x3
    layers.MaxPooling2D((2, 2)),  # MaxPooling layer to reduce spatial dimensions
    layers.Flatten(),  # Flatten the output of the convolutional layer
    layers.Dense(128, activation='relu'),  # First hidden dense layer with 128 units
    layers.Dense(64, activation='relu'),  # Second hidden dense layer with 64 units
    layers.Dense(10, activation='softmax')  # Output layer with 10 units (for 10 classes)
])

# Compile the model
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_test, y_test))


# Evaluate the model on the test data
y_pred = model.predict(X_test, verbose=0)
y_pred = np.argmax(y_pred, axis=1)  # Convert predictions to class labels

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # 'macro' averages for multi-class classification
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

print("--------------")

# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# False positives for each class (sum of columns except the diagonal)
false_positives = cm.sum(axis=0) - np.diagonal(cm)

# Print false positives for each class
print("False Positives for each class:", false_positives)
print("False Positives for all classes:", false_positives.sum())

#Saving the model 
joblib.dump(model, 'CNN_2L.pkl')

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 12ms/step - accuracy: 0.8907 - loss: 0.3808 - val_accuracy: 0.9669 - val_loss: 0.1136
Epoch 2/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.9750 - loss: 0.0818 - val_accuracy: 0.9770 - val_loss: 0.0874
Epoch 3/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.9852 - loss: 0.0510 - val_accuracy: 0.9789 - val_loss: 0.0796
Epoch 4/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.9894 - loss: 0.0371 - val_accuracy: 0.9773 - val_loss: 0.0877
Epoch 5/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.9901 - loss: 0.0351 - val_accuracy: 0.9804 - val_loss: 0.0804
Epoch 6/100
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.9930 - loss: 0.0291 - val_accuracy: 0.9788 - val_loss: 0.0960
Epoch 7/100
[1m875/87

['CNN_2L.pkl']

### KNN

In [9]:
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib
from sklearn.metrics import confusion_matrix


# Loading the pre-split data for reference 
X_train, X_test, y_train, y_test = joblib.load(r"Train_Test_Splits.pkl")

# Reshape the data
X_train = X_train.reshape(X_train.shape[0], -1)  # Flatten each image to 1D
X_test = X_test.reshape(X_test.shape[0], -1)

# Define the kNN model
model = KNeighborsClassifier(n_neighbors=5)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # 'macro' averages for multi-class classification
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("--------------------")

# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# False positives for each class (sum of columns except the diagonal)
false_positives = cm.sum(axis=0) - np.diagonal(cm)

# Print false positives for each class
print("False Positives for each class:", false_positives)

print("False Positives for all classes:", false_positives.sum())

#Saving the model 
joblib.dump(model, 'kNN_k_5.pkl')

[WinError 2] The system cannot find the file specified
  File "C:\Users\dyari\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\Users\dyari\anaconda3\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\dyari\anaconda3\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\dyari\anaconda3\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


Accuracy: 0.9461
Precision: 0.9465
Recall: 0.9454
F1 Score: 0.9457
--------------------
False Positives for each class: [ 44  75  72  95  73  81  43  88  34 149]
False Positives for all classes: 754


['kNN_k_5.pkl']

## Ensember Method: RF

In [11]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# Loading the pre-split data for reference 
X_train, X_test, y_train, y_test = joblib.load(r"Train_Test_Splits.pkl")


# Flatten the images to 1D vectors
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Define the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)


# Make predictions
y_pred = model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # 'macro' averages for multi-class classification
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("-----------------------")

# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# False positives for each class (sum of columns except the diagonal)
false_positives = cm.sum(axis=0) - np.diagonal(cm)

# Print false positives for each class
print("False Positives for each class:", false_positives)

print("False Positives for all classes:", false_positives.sum())

#Saving the train and test sets 
joblib.dump(model, 'RF.pkl')

Accuracy: 0.9675
Precision: 0.9673
Recall: 0.9674
F1 Score: 0.9673
-----------------------
False Positives for each class: [22 27 67 60 50 38 28 50 54 59]
False Positives for all classes: 455


['RF.pkl']