In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.regularizers import l2 
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Dropout, Activation, BatchNormalization

%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.image  as mpimg

In [2]:
train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')

In [3]:
train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
y = train['label']
X = train.drop(train.columns[[0]], axis = 1)

In [5]:
test.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
X = X.values
print(X)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [7]:
X.shape

(42000, 784)

In [8]:
X = X.values.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)

AttributeError: 'numpy.ndarray' object has no attribute 'values'

In [None]:
print("Size of Dataset: " , len(X))

In [None]:
Cross_Validation_size = int(len(X)*0.05)
print("Validation Set Size: " , Cross_Validation_size)

In [None]:
X_train, X_val, Y_train, Y_val = train_test_split(X, y, test_size = Cross_Validation_size, random_state=2)

X_test = test

In [None]:
plt.figure(figsize=(2, 2))
plt.imshow(X_train[21][:,:,0], cmap = 'gray')
plt.show()

In [None]:
X_train = np.array(X_train)
X_val = np.array(X_val)
X_test = np.array(X_test)

# Padding the images by 2 pixels since in the paper input images were 32x32
X_train = np.pad(X_train, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_val = np.pad(X_val, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_test = np.pad(X_test, ((0,0),(2,2),(2,2),(0,0)), 'constant')

# Standardization
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)
X_train = (X_train - mean_px)/(std_px)

mean_px = X_val.mean().astype(np.float32)
std_px = X_val.std().astype(np.float32)
X_val = (X_val - mean_px)/(std_px)

mean_px = X_test.mean().astype(np.float32)
std_px = X_test.std().astype(np.float32)
X_test = (X_test - mean_px)/(std_px)

# One-hot encoding the labels
Y_train = to_categorical(Y_train, num_classes = 10)
Y_val = to_categorical(Y_val, num_classes = 10)

In [None]:
print(Y_train)

In [None]:
datagen = ImageDataGenerator(
        featurewise_center = False,
        samplewise_center = False,
        featurewise_std_normalization = False,
        samplewise_std_normalization = False,
        zca_whitening = False,
        rotation_range = 10,
        zoom_range = 0.1,
        width_shift_range = 0.1,
        height_shift_range = 0.1,
        horizontal_flip = False,
        vertical_flip = False)

datagen.fit(X_train)

In [None]:
variable_learning_rate = ReduceLROnPlateau(monitor='val_loss', factor = 0.2, patience = 2)

In [None]:
model = Sequential([
    Conv2D(filters = 32, kernel_size = 5, strides = 1, activation = 'relu', input_shape = (32,32,1), kernel_regularizer=l2(0.0005), name = 'convolution_1'),
    Conv2D(filters = 32, kernel_size = 5, strides = 1, name = 'convolution_2', use_bias=False),
    BatchNormalization(name = 'batchnorm_1'),
    Activation("relu"),
    MaxPooling2D(pool_size = 2, strides = 2, name = 'max_pool_1'),
    Dropout(0.25, name = 'dropout_1'),
    Conv2D(filters = 64, kernel_size = 3, strides = 1, activation = 'relu', kernel_regularizer=l2(0.0005), name = 'convolution_3'),
    Conv2D(filters = 64, kernel_size = 3, strides = 1, name = 'convolution_4', use_bias=False),
    BatchNormalization(name = 'batchnorm_2'),
    Activation("relu"),
    MaxPooling2D(pool_size = 2, strides = 2, name = 'max_pool_2'),
    Dropout(0.25, name = 'dropout_2'),
    Flatten(name = 'flatten'),    
    Dense(units = 256, name = 'fully_connected_1', use_bias=False), 
    BatchNormalization(name = 'batchnorm_3'),
    Activation("relu"),
    Dense(units = 128, name = 'fully_connected_2', use_bias=False), 
    BatchNormalization(name = 'batchnorm_4'),
    Activation("relu"),
    Dense(units = 84, name = 'fully_connected_3', use_bias=False),
    BatchNormalization(name = 'batchnorm_5'),
    Activation("relu"),
    Dropout(0.25, name = 'dropout_3'),
    Dense(units = 10, activation = 'softmax', name = 'output')
    ])

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, Y_train, epochs = 13, batch_size = 64, callbacks = [variable_learning_rate], validation_data = (X_val,Y_val))

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1) 
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')

plt.tight_layout()
plt.show()

In [None]:
results = model.predict(X_test)
results = np.argmax(results,axis = 1)
results = pd.Series(results,name="Label")

In [None]:
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)
submission.to_csv("predictions.csv",index=False)

In [None]:
test_labels = results

In [None]:
prediction = model.predict(X_test[[10]])
prediction_class = np.argmax(prediction, axis=1)
# prediction_classes_set.append(prediction_class)

In [None]:
# print(prediction_classes_set)

In [None]:
# prediction = model.predict(X_test[[i]])
# prediction_class = np.argmax(prediction, axis=1)
# prediction_classes_set.append(prediction_class)

plt.figure(figsize = (2,2))

plt.imshow(X_test[10].reshape(32, 32, 1), cmap = 'gray')
plt.title(f'Original Image\nPredicted: {prediction_class[0]}')
plt.axis('off')
plt.show()
    

<br></br>
<h3>To determine most vulnerable digits(out of ten digits), adding Gaussian noise to the images:</h3>

In [None]:
def gaussian_noise(X_test, sigma=1):
    """Add Gaussian noise to the dataset."""
    noise = np.random.normal(loc=0.0, scale=sigma, size=X_test.shape)
    X_noisy = X_test + noise
    return np.clip(X_noisy, 0., 1.)

In [None]:
#one-hot encoding test labels

Y_test = to_categorical(test_labels, num_classes = 10)

In [None]:
print(Y_test)

In [None]:
from sklearn.metrics import classification_report, accuracy_score

sigma_values = [0.4]
for sigma in sigma_values:
    X_test_noisy = gaussian_noise(X_test, sigma=sigma)
    predictions = model.predict(X_test_noisy)
    predicted_classes = np.argmax(predictions, axis=1)
    true_classes = np.argmax(Y_test, axis=1)
        
    
    accuracy = accuracy_score(true_classes, predicted_classes)
    print(f"Accuracy with sigma={sigma}: {accuracy}")
    
    # Detailed report to find the most vulnerable digit
    print(f"Classification Report for sigma={sigma}:")
    print(classification_report(true_classes, predicted_classes))

In [None]:

for i in range(30):
    fig, ax = plt.subplots(1, 2, figsize=(10, 5))

    ax[0].imshow(X_test[i].reshape(32, 32, 1), cmap = 'gray')
    ax[0].set_title(f'Original Image\nPredicted: {true_classes[i]}')
    ax[0].axis('off')

    # Plot noisy image
    ax[1].imshow(X_test_noisy[i].reshape(32, 32, 1), cmap = 'gray')  # Adjust shape as needed for noisy image
    ax[1].set_title(f'Noisy Image\nPredicted: {predicted_classes[i]}')
    ax[1].axis('off')

    plt.show()

In [None]:
counts = np.zeros(10)

for true_class, predicted_class in zip(predicted_classes, true_classes):
    if(true_class != predicted_class):
            counts[true_class]+=1

In [None]:
np.argmax(counts)

In [None]:
pip freeze > requirements.txt

<br></br>
<h2>Findings of the Experiment:</h2>

After adding noise(Sigma value is 0.4) to the predicted values, the most vulnerable digit I got is number "1".