We start by importing some useful libraries: numpy for matrices, keras for machine learning and mathplotlib for visualizing data. We also make mathplotlib plots appear 'inline' i.e. in the notebook, along with the rest of the content.

In [None]:
%matplotlib inline

In [None]:
import numpy
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils
import matplotlib.pyplot as plt
import matplotlib.image as img

Download MNIST dataset (if needed) and separate training data and validation data. 'X' matrices are input data and 'y' vectors are expected outputs, i.e. labels.

In [None]:
(X_train_raw, y_train_raw), (X_test_raw, y_test_raw) = mnist.load_data()

print("X_train shape:", X_train_raw.shape)
print("y_train shape:", y_train_raw.shape)
print("X_test shape:", X_test_raw.shape)
print("y_test shape:", y_test_raw.shape)

We print some samples of the input data just to show what it looks like.

In [None]:
plt.subplot(2, 2, 1)
plt.imshow(X_train_raw[0], cmap=plt.get_cmap('gray'))
plt.subplot(2, 2, 2)
plt.imshow(X_train_raw[1], cmap=plt.get_cmap('gray'))
plt.subplot(2, 2, 3)
plt.imshow(X_train_raw[2], cmap=plt.get_cmap('gray'))
plt.subplot(2, 2, 4)
plt.imshow(X_train_raw[3], cmap=plt.get_cmap('gray'))
# show the plot
plt.show()

for i in range(0,4):
    print("y_train_raw[%d]:" % i, y_train_raw[i])

In [None]:
# flatten 28*28 images to a 784 vector for each image
num_pixels = X_train_raw.shape[1] * X_train_raw.shape[2]
X_train_flattened = X_train_raw.reshape(X_train_raw.shape[0], num_pixels).astype('float32')
X_test_flattened = X_test_raw.reshape(X_test_raw.shape[0], num_pixels).astype('float32')

print("X_train_flattened shape:", X_train_flattened.shape)
print("X_test_flattened shape:", X_test_flattened.shape)
plt.subplot(411)
plt.imshow([X_train_flattened[0]], cmap=plt.get_cmap('gray'), extent = (0, 784, 0, 70), aspect='equal')
plt.subplot(412)
plt.imshow([X_train_flattened[1]], cmap=plt.get_cmap('gray'), extent = (0, 784, 0, 70), aspect='equal')
plt.subplot(413)
plt.imshow([X_train_flattened[2]], cmap=plt.get_cmap('gray'), extent = (0, 784, 0, 70), aspect='equal')
plt.subplot(414)
plt.imshow([X_train_flattened[3]], cmap=plt.get_cmap('gray'), extent = (0, 784, 0, 70), aspect='equal')
plt.show()

for i in range(0,4):
    print("y_train_raw[%d]:" % i, y_train_raw[i])

In [None]:
# normalize inputs from 0-255 to 0-1
X_train = X_train_flattened / 255
X_test = X_test_flattened / 255
print("X_train shape: still", X_train.shape)
print("X_test shape: still", X_test.shape)

In [None]:
# one hot encode outputs
y_train = np_utils.to_categorical(y_train_raw)
y_test = np_utils.to_categorical(y_test_raw)
num_classes = y_test.shape[1]
print("y_test_raw[0]: ", y_test_raw[0])
print("y_test[0]: ", y_test[0])

We define a function that creates a neural network architecture with one hidden layer and initializes its weights to random numbers. num_pixel is 784, the number of pixels in each of our input samples. num_classes is 10, one for each number from 0 to 9.

We will probably want to reuse this model definition and initialization later on.

In [None]:
def create_model():
    # define architecture
    model = Sequential()
    model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
    model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
    # Build model with a cost function (loss) and an optimizer.
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

This function trains a given model with given training data and prints the result. The training data is divided into batches of 200 samples each and we train the model on them until all samples have been seen 10 times.

In [None]:
def train_model(model, X_train, y_train):
    # Train the model
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
    # Final evaluation of the model
    scores = model.evaluate(X_test, y_test, verbose=0)
    print("Baseline Error: %.2f%%" % (100-scores[1]*100))

Finally: We create an untrained model, and train it. Once this is done, it should be able to reasonably recognize hand-written digits.

In [None]:
model = create_model()

train_model(model, X_train, y_train)

Utility function to read an image file, assumed to be a 28x28 black-and-white .png image containing one hand-written digit.

In [None]:
def read_digit(filename):
    fromfile = img.imread(filename)
    grayscale_image = fromfile[:,:,0] # Reduced to grayscale from RGB
    plt.imshow(grayscale_image, cmap=plt.get_cmap('gray'))
    plt.show()
    return grayscale_image.reshape(784)

In [None]:
def evaluate_with_image(model, filename):
    test_digit = read_digit(filename)
    # let the model predict what digit this is and visualize the result
    predicted = model.predict(numpy.array([test_digit]), 1)
    plt.bar([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], predicted[0], align='center')
    plt.show()
    print("Most likely:", numpy.argmax(predicted))

In [None]:
evaluate_with_image(model, "../drawingboard/digit.png")

### Your task: train a new model to recognize sevens with a horizontal bar.

#### Hints:
1. You need to add training data containing sevens with horizontal bars, both input (X_train) and output (y_train)
2. There are 5 files containing handwritten sevens in "../sevens/digit{1-5}.png"

#### Some useful functions:

`[ expr(i) for i in range(MAX)]` <-- creates a list with MAX elements, values expr(i) from 0 to MAX, i.e.

`[ i*i for i in range(4)]` => `[0, 1, 4, 9]`

`[1, 2, 3].append(4)` => `[1, 2, 3, 4]`

`numpy.append(numpy.create([[1, 2],[3, 4]]), [5, 6])` => `numpy.array([[1, 2], [3, 4], [5, 6]])`

In [None]:
def read_seven(sequence_number):
    return read_digit("../sevens/digit" + str(sequence_number) + ".png")

sevens = []
for i in range(5):
    sevens.append(read_seven(i+1))
sevens =[sevens[i%5] for i in range(1000)]
#print(type(X_train))
X_train_new = numpy.append(X_train, sevens, axis = 0)
#X_train.append(sevens, axis=0)


In [None]:
y_seven = [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]
y_sevens = [y_seven for i in range(1000)]
y_train_new = numpy.append(y_train, y_sevens, axis = 0)

In [None]:
model = create_model()
# Fit the model with all the new sevens
model.fit(X_train_new, y_train_new, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Baseline Error: %.2f%%" % (100-scores[1]*100))

In [None]:
def print_weights(layer):
    weights = layer.get_weights()
    input_weights = weights[0]
    bias_vector = weights[1]
    print("input weights shape:", numpy.array(input_weights).shape)
    print("bias vector shape:", numpy.array(bias_vector).shape)
    print("input weights first neuron:", input_weights[0])
    print("bias vector:", bias_vector)

    
def describe_model(model):
    for layer in model.layers:
        print("units:", layer.units)
        print("input:", layer.input)
        print("output:", layer.output)
        print_weights(layer)