# Character Recognition

The next cell downloads MNIST from the Internet to an `mnist` directory. It takes some time, but you should only have to run it once. However, there is no harm in running it multiple times, if you do it by mistake.

In [None]:
# Only run once, to download MNIST.

import urllib.request
import os

# Create an 'mnist' directory unless it exists:
LOCAL_DIR = './mnist/'
if not os.path.exists(LOCAL_DIR):
    os.makedirs(LOCAL_DIR)

# Download the four MNIST files from the official site:
MNIST_SITE = 'http://yann.lecun.com/exdb/mnist/'
TRAINING_IMAGES = 'train-images-idx3-ubyte.gz'
TRAINING_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'

urllib.request.urlretrieve(MNIST_SITE + TRAINING_IMAGES, LOCAL_DIR + TRAINING_IMAGES)
urllib.request.urlretrieve(MNIST_SITE + TRAINING_LABELS, LOCAL_DIR + TRAINING_LABELS)
urllib.request.urlretrieve(MNIST_SITE + TEST_IMAGES, LOCAL_DIR + TEST_IMAGES)
urllib.request.urlretrieve(MNIST_SITE + TEST_LABELS, LOCAL_DIR + TEST_LABELS)

print("Data loaded")

Now here's the code that loads MNIST, starting with the images:

In [None]:
import numpy as np
import gzip
import struct

def load_images(filename):
    # Open and unzip the file of images:
    with gzip.open(filename, 'rb') as f:
        # Read the header information into a bunch of variables:
        _ignored, n_images, image_columns, image_rows = struct.unpack('>IIII', f.read(16))
        # Read all the pixels into a long NumPy array:
        all_pixels = np.frombuffer(f.read(), dtype=np.uint8)
        # Reshape the array into a matrix where each line is an image:
        images_matrix = all_pixels.reshape(n_images, image_columns * image_rows)
        # Add a bias column full of 1s as the first column in the matrix
        return np.insert(images_matrix, 0, 1, axis=1)

In [None]:
# 60000 images, each 785 elements (1 bias + 28 * 28 pixels)
X_train = load_images("./mnist/train-images-idx3-ubyte.gz")

# 10000 images, each 785 elements, with the same structure as X_train
X_test = load_images("./mnist/t10k-images-idx3-ubyte.gz")

Let's check that we have a (60000, 785) matrix of training images:

In [None]:
X_train.shape

Now let's load the labels. Note that the system we're writing identifies the digit 4, so the labels that are originally 4 become 1, and the others become 0:

In [None]:
def load_labels(filename):
    # Open and unzip the file of images:
    with gzip.open(filename, 'rb') as f:
        # Skip the header bytes:
        f.read(8)
        # Read all the labels into a list:
        all_labels = f.read()
        # Reshape the list of labels into a one-column matrix:
        labels_matrix = np.frombuffer(all_labels, dtype=np.uint8).reshape(-1, 1)
        # Encode the matrix so that all 4s become 1, and other digits become 0s:
        return (labels_matrix == 4).astype(int)

In [None]:
# 60K labels, each with value 1 if the digit is a five, and 0 otherwise
Y_train = load_labels("./mnist/train-labels-idx1-ubyte.gz")

# 10000 labels, with the same encoding as Y_train
Y_test = load_labels("./mnist/t10k-labels-idx1-ubyte.gz")

The training labels should be a matrix with 1 column and 60K rows:

In [None]:
Y_train.shape

So far, so good. Now here is the code of the binary classifier from the previous module. Nothing changed in any of these functions:

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [None]:
def predict(X, w):
    return sigmoid(np.matmul(X, w))

In [None]:
def loss(X, Y, w):
    predictions = predict(X, w)
    first_term = Y * np.log(predictions)
    second_term = (1 - Y) * np.log(1 - predictions)
    return -np.average(first_term + second_term)

In [None]:
def gradient(X, Y, w):
    return np.matmul(X.T, (predict(X, w) - Y)) / X.shape[0]

In [None]:
def train(X, Y, iterations, lr):
    w = np.zeros((X.shape[1], 1))
    for i in range(iterations):
        print("Iteration %4d => Loss: %.20f" % (i, loss(X, Y, w)))
        w -= gradient(X, Y, w) * lr
    return w

Let's run training with 200 iterations and a pretty small learning rate. This is going to take a minute or two:

In [None]:
w = train(X_train, Y_train, iterations=200, lr=0.00001)

The result is a matrix of 785 weights–one for each pixel in the images, plus one for the bias:

In [None]:
w.shape

Now let's check the first ten predictions, and compare them with the first ten labels:

In [None]:
np.round(predict(X_test, w))[0:10]

In [None]:
Y_test[0:10]

This doesn't look bad so far! As usual, feel free to explore the results over other areas of the test set. In the next module, we'll predict all the digits.