In [5]:
import keras
from keras.datasets import mnist
import numpy as np
from matplotlib import pyplot as plt

In [6]:
mnist_data = mnist.load_data()

In [7]:
def make_noisy(mnist_image):
    mnist_rand_image = np.copy(mnist_image)

    for x in np.nditer(mnist_rand_image, op_flags=['readwrite']):
        randval = np.random.randint(0,10)
        if randval == 0:
            x[...] = 0
        elif randval == 1:
            x[...] = 255
            
    return mnist_rand_image

#plt.imshow(make_noisy(mnist_data[0][0][0]), cmap='gray')

In [9]:
train_noisy_array = []
train_data_size = mnist_data[0][0].shape[0]

for i in range(0,train_data_size):
    train_noisy_array.append(make_noisy(mnist_data[0][0][i]))

train_noisy_array = np.asarray(train_noisy_array)

In [26]:
test_noisy_array = []
test_data_size = mnist_data[1][0].shape[0]

for i in range(0,test_data_size):
    test_noisy_array.append(make_noisy(mnist_data[1][0][i]))

test_noisy_array = np.asarray(test_noisy_array)

In [11]:
train_labels=mnist_data[0][1]

In [12]:
test_labels=mnist_data[1][1]

In [27]:
noisy_mnist = ((train_noisy_array, train_labels),(test_noisy_array, test_labels))

In [9]:
## CLEAN MNIST

from __future__ import print_function

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
from keras.optimizers import SGD

batch_size = 128
num_classes = 10
epochs = 20

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data() 

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
#model.add(Dropout(0.2))
#model.add(Dense(32, activation='relu'))
#model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

sgd = SGD(lr=0.03, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_split=0.2)
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

model.save("keras_mnist_modelv2_one_dense_clean.h5")

60000 train samples
10000 test samples
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               401920    
_________________________________________________________________
dense_2 (Dense)              (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________
Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test loss: 0.06225668139125919
Test accuracy: 0.9801


In [10]:
## NOISY MNIST

from __future__ import print_function

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
from keras.optimizers import SGD

batch_size = 128
num_classes = 10
epochs = 20

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = noisy_mnist

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
#model.add(Dropout(0.2))
#model.add(Dense(32, activation='relu'))
#model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

sgd = SGD(lr=0.03, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_split=0.2)
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

model.save("keras_mnist_modelv2_one_dense_noisy.h5")

60000 train samples
10000 test samples
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 512)               401920    
_________________________________________________________________
dense_4 (Dense)              (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________
Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test loss: 0.2283195750296698
Test accuracy: 0.9411


In [14]:
from keras.models import load_model

In [15]:
def data_cleanup_for_evaluation_helper(xx_test, yy_test, numnum_classes=10):
    num_classes = numnum_classes
    x_test = xx_test
    y_test = yy_test
    
    x_test = x_test.reshape(10000, 784)
    x_test = x_test.astype('float32')
    x_test /= 255
    
    # convert class vectors to binary class matrices
    y_test = keras.utils.to_categorical(y_test, num_classes)
    return (x_test, y_test)

In [20]:
noisy_model_filename = "keras_mnist_modelv2_one_dense_noisy.h5"
noisy_model = load_model(noisy_model_filename)

In [28]:
(x_train, y_train), (x_test, y_test) = mnist_data

(x_test, y_test) = data_cleanup_for_evaluation_helper(x_test, y_test)

score = noisy_model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

('Test loss:', 0.15027683409203274)
('Test accuracy:', 0.9667)


In [29]:
(x_train, y_train), (x_test, y_test) = noisy_mnist

(x_test, y_test) = data_cleanup_for_evaluation_helper(x_test, y_test)

score = noisy_model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

('Test loss:', 0.23684982892246917)
('Test accuracy:', 0.9391)


In [23]:
clean_model_filename = "keras_mnist_modelv2_one_dense_clean.h5"
clean_model = load_model(clean_model_filename)

In [30]:
(x_train, y_train), (x_test, y_test) = mnist_data

(x_test, y_test) = data_cleanup_for_evaluation_helper(x_test, y_test)

score = clean_model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

('Test loss:', 0.06225668139125919)
('Test accuracy:', 0.9801)


In [31]:
(x_train, y_train), (x_test, y_test) = noisy_mnist

(x_test, y_test) = data_cleanup_for_evaluation_helper(x_test, y_test)

score = clean_model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

('Test loss:', 0.5437456713080406)
('Test accuracy:', 0.8279)


In [32]:
## BELOW WILL BE NOISY THEN CLEAN TRAINING

train_noisy_clean_array = []
train_data_size = mnist_data[0][0].shape[0]
validation_split_size = 0.2
actual_train_data_size = int(train_data_size * (1 - validation_split_size))

# Training set
for i in range(0, actual_train_data_size):
    if (i < actual_train_data_size/2):
        train_noisy_clean_array.append(make_noisy(mnist_data[0][0][i]))
    else:
        train_noisy_clean_array.append(mnist_data[0][0][i])

# Validation set 
validation_data_size = train_data_size - actual_train_data_size

for i in range(0, validation_data_size):
    if (i < validation_data_size/2):
        train_noisy_clean_array.append(make_noisy(mnist_data[0][0][i+actual_train_data_size]))
    else:
        train_noisy_clean_array.append(mnist_data[0][0][i+actual_train_data_size])

train_noisy_clean_array = np.asarray(train_noisy_clean_array)

In [118]:
train_labels=mnist_data[0][1]

test_labels=mnist_data[1][1]

In [119]:
noisy_then_clean_mnist = ((train_noisy_clean_array,train_labels),(mnist_data[1][0],test_labels))

In [126]:
## NOISY_THEN_CLEAN MNIST, BUT BECAUSE OF SHUFFLING, THE NOISY AND CLEAN WILL BE UNIFORMLY INTERSPERSED

from __future__ import print_function

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
from keras.optimizers import SGD

batch_size = 128
num_classes = 10
epochs = 20

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = noisy_then_clean_mnist #mnist.load_data() 

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
#model.add(Dropout(0.2))
#model.add(Dense(32, activation='relu'))
#model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

sgd = SGD(lr=0.03, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_split=0.2)
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

model.save("keras_mnist_modelv2_one_dense_noisy_clean_uniform.h5")

60000 train samples
10000 test samples
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_21 (Dense)             (None, 512)               401920    
_________________________________________________________________
dense_22 (Dense)             (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________
Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test loss: 0.078595794655988
Test accuracy: 0.9774


In [33]:
noisy_clean_uniform_model_filename = "keras_mnist_modelv2_one_dense_noisy_clean_uniform.h5"
noisy_clean_uniform_model = load_model(noisy_clean_uniform_model_filename)

(x_train, y_train), (x_test, y_test) = mnist_data

(x_test, y_test) = data_cleanup_for_evaluation_helper(x_test, y_test)

score = noisy_clean_uniform_model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

('Test loss:', 0.078595794655988)
('Test accuracy:', 0.9774)


In [34]:
noisy_clean_uniform_model_filename = "keras_mnist_modelv2_one_dense_noisy_clean_uniform.h5"
noisy_clean_uniform_model = load_model(noisy_clean_uniform_model_filename)

(x_train, y_train), (x_test, y_test) = noisy_mnist

(x_test, y_test) = data_cleanup_for_evaluation_helper(x_test, y_test)

score = noisy_clean_uniform_model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

('Test loss:', 0.23047032042229548)
('Test accuracy:', 0.9363)


#A network trained on clean mnist (i.e. regular mnist) has the following performance:
    ##regular mnist test accuracy: 0.9801
    ##noisy mnist test accuracy  : 0.8355

#A network trained on all noisy mnist has the following performance:
    ##regular mnist test accuracy: 0.9667
    ##noisy mnist test accuracy  : 0.9411
    
#A network train on noisy and clean mnist (half for each), uniformly dispersed in the data, has the following performance:
    ##regular mnist test accuracy: 0.9774
    ##noisy mnist test accuracy  : 0.9368
    
##For the noisy mnist test accuracy, there can be descrepancy because adding noise is random (i.e. two noisy mnist data could be different, as this is something we have not currently accounted for)