In [4]:
# Python imports
import os

# Data manipulation imports
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# ML Imports
import tensorflow as tf
import keras
from keras.models import Sequential, load_model
from keras.layers import InputLayer
from keras.layers import Reshape, MaxPooling2D, ZeroPadding2D, Dropout
from keras.layers import Conv2D, Dense, Flatten
from keras.utils import to_categorical

# IPython display
from IPython.display import display

Using TensorFlow backend.


In [7]:
from google.colab import drive
drive.mount('/content/gdrive')
X = pd.read_csv('gdrive/My Drive/mnis/modified_mnist_x.csv', dtype=np.float32)
y = pd.read_csv('gdrive/My Drive/mnis/modified_mnist_y.csv', dtype=np.float32)


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
BATCH_SIZE = 512 # Reduce this to 256, 128, 64, etc if your computer is too slow
IMG_SIZE = 64 # Images are 64x64
NUM_CHANNELS = 1 # Images can have multiple channels eg. rgb. These images are grayscale so we have just 1 channel
NUM_CLASSES = 10 # Digits 0-9, so we have 10 classes

IMG_SIZE_FLAT = IMG_SIZE ** 2 # 64^2 = 4096, the number of pixels per image
IMG_SHAPE = (IMG_SIZE, IMG_SIZE)
IMG_SHAPE_CHANNELS = (IMG_SIZE, IMG_SIZE, NUM_CHANNELS)
INPUT_SHAPE = (-1, IMG_SIZE, IMG_SIZE, NUM_CHANNELS)

In [0]:
def preprocess(x):
    '''
    Simple preprocessing function that removes the background from the images and normalizes pixel values between 0 and 1
    :param x: Input array or dataframe
    '''
    x[x < 255] = 0 # Remove background
    return x / 255.0 # normalize

def show_data(X, y_true=None, y_pred=None, n_examples=10, shape=IMG_SHAPE):
    '''
    Displays the images X along with actual and predicted labels if provided.
    :param X: The array of images.
    :param y_true: The true labels
    :param y_pred: The predicted labels
    :param n_examples: The number of images to show.
    :param shape: If images are flattened, reshapes to shape
    :return: None
    '''
    for i in range(n_examples):
        image = np.array(X[i], dtype=np.float32)
        pixels = image.reshape(shape)
        plt.imshow(pixels, cmap='binary')
        plt.xlabel('Actual: {} Pred: {}'.format(y_true[i] if y_true is not None else None, 
                                                y_pred[i] if y_pred is not None else None))
        plt.show()

def predict(model, X):
    '''
    Given a Keras model, performs prediction and computes the labels
    :param model: Keras model
    :param X: Input features
    :return: Predicted labels
    '''
    y_pred = model.predict(X)
    y_pred = np.argmax(y_pred, axis=1)
    return y_pred

In [10]:
display(X.head())
display(y.head())

Unnamed: 0,149.000000,155.000000,154.000000,158.000000,158.000000.1,156.000000,156.000000.1,158.000000.2,156.000000.2,153.000000,...,182.000000.30,179.000000.109,173.000000.140,181.000000.67,185.000000.17,184.000000.19,181.000000.68,177.000000.119,174.000000.129,173.000000.141
0,52.0,54.0,55.0,37.0,37.0,37.0,37.0,38.0,38.0,38.0,...,36.0,39.0,40.0,38.0,37.0,40.0,43.0,42.0,55.0,55.0
1,109.0,113.0,117.0,116.0,110.0,102.0,94.0,91.0,101.0,109.0,...,82.0,85.0,86.0,96.0,118.0,119.0,108.0,95.0,74.0,71.0
2,176.0,177.0,177.0,177.0,169.0,159.0,158.0,167.0,175.0,188.0,...,170.0,171.0,142.0,118.0,115.0,120.0,108.0,80.0,41.0,29.0
3,64.0,29.0,30.0,30.0,32.0,32.0,32.0,35.0,30.0,32.0,...,35.0,37.0,38.0,33.0,119.0,101.0,39.0,45.0,48.0,52.0
4,123.0,123.0,123.0,121.0,117.0,111.0,105.0,101.0,100.0,115.0,...,61.0,56.0,58.0,57.0,57.0,57.0,57.0,55.0,51.0,49.0


Unnamed: 0,4
0,7.0
1,1.0
2,0.0
3,8.0
4,7.0


In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

In [12]:
display(X_train.shape)
display(X_test.shape)

(39999, 4096)

(10000, 4096)

In [0]:
X_train = np.reshape(X_train.values, INPUT_SHAPE)
X_test = np.reshape(X_test.values, INPUT_SHAPE)
y_train = y_train.values
y_test = y_test.values

In [0]:
X_train = preprocess(X_train)
X_test = preprocess(X_test)

In [15]:
display(X_train.shape) # Now our inputs have been reshaped, so we have a 4D array, with each item representing an image.
display(X_test.shape)

(39999, 64, 64, 1)

(10000, 64, 64, 1)

In [0]:
y_train_enc = to_categorical(y_train, NUM_CLASSES)
y_test_enc = to_categorical(y_test, NUM_CLASSES)

In [0]:
def create_model():
    # TODO: Create your model here
    model = Sequential()
    model.add(Conv2D(32, (5, 5), padding='same', activation='relu', input_shape=IMG_SHAPE_CHANNELS))
    model.add(Dropout(0.20))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (5, 5), padding='same', activation='relu'))
    model.add(Dropout(0.20))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(128, (5, 5), padding='same', activation='relu'))
    model.add(Dropout(0.20))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(256, (5, 5), padding='same', activation='relu'))
    model.add(Dropout(0.30))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(0.40))

    model.add(Dense(1024, activation='relu'))

    model.add(Dense(NUM_CLASSES, activation='softmax'))
    return model

In [18]:
model = create_model()
# TODO: Compile your model
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [19]:
# TODO: fit your model to the training set
model.fit(x=X_train, y=y_train_enc, epochs=10, batch_size=BATCH_SIZE)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f95c5dbb668>

In [21]:
y_pred = predict(model, X_test)
print('Your accuracy is:', accuracy_score(y_test, y_pred))

Your accuracy is: 0.9457
