<a href="https://colab.research.google.com/github/Himika-Mishra/Dissertation/blob/main/Mnist_Dissertation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [None]:
# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
# Flatten the images and normalize the pixel values to [0, 1]
x_train = x_train.reshape((60000, 28 * 28)).astype('float32') / 255
x_test = x_test.reshape((10000, 28 * 28)).astype('float32') / 255

In [None]:
len(y_train)

60000

In [None]:
# Append the target arrays
y = np.concatenate((y_train, y_test))

In [None]:
# One-hot encode the target arrays
num_classes = 10
y_onehot = np.eye(num_classes)[y]

In [None]:
# Splitting the encoded labels into train and test
y_train = y_onehot[:60000]
y_test = y_onehot[-10000:]

In [None]:
# Define original_ratio values
original_ratios = np.arange(0.1, 1.1, 0.1)

In [None]:
# Iterate over original_ratio values
for original_ratio in original_ratios:
    print(f"Training and testing with original_ratio={original_ratio}")
    
    # Generate random noise images
    num_noise_images = int((1 - original_ratio) * len(x_train))
    noise_images = np.random.rand(num_noise_images, 28 * 28)
    noise_labels = np.zeros((num_noise_images, 10))

    # Replace a portion of original images with noisy images
    indices = np.random.choice(len(x_train), num_noise_images, replace=False)
    x_train_with_noise = np.copy(x_train)
    x_train_with_noise[indices] = noise_images
    y_train_with_noise = np.copy(y_train)
    y_train_with_noise[indices] = noise_labels

    # Fit a regular MLP classifier
    mlp = MLPClassifier(hidden_layer_sizes=(10,8), activation='relu', solver='adam', max_iter=500)
    mlp.fit(x_train_with_noise, y_train_with_noise)
    # Accuracy Score
    test_acc = mlp.score(x_train_with_noise, y_train_with_noise)
    # print(mlp.score(x_train_with_noise, y_train_with_noise))
    print(f"Test accuracy with original_ratio={original_ratio}: {test_acc}")

    # predictions
    pred = mlp.predict(x_test)
    print(confusion_matrix(y_test.argmax(axis=1), pred.argmax(axis=1)))
    print(classification_report(y_test.argmax(axis=1), pred.argmax(axis=1)))
    print("--------------------------------------------------")

Training and testing with original_ratio=0.1
Test accuracy with original_ratio=0.1: 0.99905
[[ 920    0    8    8    0   29   13    1    0    1]
 [  28 1065    3    7    3    4    3    5   16    1]
 [ 110    3  805   46   10   10    9   10   24    5]
 [  48   18   48  826    0   33    1   12   16    8]
 [  73    7   31    0  792    3    4   19    3   50]
 [  90    9    3   41   14  679    9    6   39    2]
 [  62    3   19    0   30   30  806    1    5    2]
 [  28   11   30    7    9    3    1  897    1   41]
 [  99   24   12   30   30   58    3    9  700    9]
 [  46   12   11   11   73    4    0   32   10  810]]
              precision    recall  f1-score   support

           0       0.61      0.94      0.74       980
           1       0.92      0.94      0.93      1135
           2       0.83      0.78      0.80      1032
           3       0.85      0.82      0.83      1010
           4       0.82      0.81      0.82       982
           5       0.80      0.76      0.78       89