In [12]:
import tensorflow as tf
import matplotlib.pyplot as plt
tf.__version__

'1.12.0'

In [2]:
def plot_loss_history(histories, key='loss'):
    plt.figure(figsize=(8,5))

    for name, history in histories:
        val = plt.plot(history.epoch, history.history['val_'+key],
                       '--', label=name.title()+' Val')
        plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
             label=name.title()+' Train')

    plt.xlabel('Epochs')
    plt.ylabel(key.replace('_',' ').title())
    plt.legend()

    plt.xlim([0,max(history.epoch)])

In [3]:
def plot_accuracy_history(histories, key='accuracy'):
    plt.figure(figsize=(8,5))

    for name, history in histories:
        val = plt.plot(history.epoch, history.history['val_'+key],
                       '--', label=name.title()+' Val')
        plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
             label=name.title()+' Train')

    plt.xlabel('Epochs')
    plt.ylabel(key.replace('_',' ').title())
    plt.legend()

    plt.xlim([0,max(history.epoch)])

In [4]:
from tensorflow.python.keras.utils import get_file
import numpy as np
import pathlib
import gzip

In [5]:
import os
import struct
def load_data():
    path = "./MNIST/"
    files = [
        'train-labels-idx1-ubyte', 'train-images-idx3-ubyte',
        'test-labels-idx1-ubyte', 'test-images-idx3-ubyte',
        'final-test-images-idx3-ubyte'
    ]

    paths = []
    for fname in files:
        paths.append(os.path.join(path, fname))

    with open(paths[0], 'rb') as lbpath:
        y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with open(paths[1], 'rb') as imgpath:
        x_train = np.frombuffer(
            imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)

    with open(paths[2], 'rb') as lbpath:
        y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with open(paths[3], 'rb') as imgpath:
        x_test = np.frombuffer(
            imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28)
    
    with open(paths[4], 'rb') as imgpath:
        x_final = np.frombuffer(
            imgpath.read(), np.uint8, offset=16).reshape(4563, 28, 28)
        
    return (x_train, y_train), (x_test, y_test), (x_final)

In [6]:
# load data
(x_train, y_train), (x_test, y_test), (x_final) = load_data()
# (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [7]:
# reshape
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
x_final = x_final.reshape(x_final.shape[0], 28, 28, 1)

# Making sure that the values are float so that we can get decimal points after division
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_final = x_final.astype('float32')

In [8]:
print('x_train shape:', x_train.shape)
print('Number of images in x_train', x_train.shape[0])
print('Number of images in x_test', x_test.shape[0])
print('Number of images in x_final', x_final.shape[0])

x_train shape: (18225, 28, 28, 1)
Number of images in x_train 18225
Number of images in x_test 4563
Number of images in x_final 4563


In [13]:
# one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)

# normalize -1~1
x_train = (x_train-127.5)/127.5
x_test = (x_test-127.5)/127.5
x_final = (x_final-127.5)/127.5

# validation
x_val = x_train[-1000:]
y_val = y_train[-1000:]
x_train = x_train[:-1000]
y_train = y_train[:-1000]

In [11]:
dpt_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(28,28,1)),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.AvgPool2D((2,2)),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='softmax')    
])
dpt_model.summary()
optimizer = tf.keras.optimizers.Adam(lr=5e-4)
dpt_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
# model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))
dpt_model_history = dpt_model.fit(x_train, y_train, epochs=30, batch_size=64, validation_data=(x_val, y_val))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 28, 28, 64)        18496     
_________________________________________________________________
average_pooling2d_1 (Average (None, 14, 14, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 14, 14, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 12544)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               1605760   
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
__________

InternalError: failed initializing StreamExecutor for CUDA device ordinal 0: Internal: failed call to cuDevicePrimaryCtxRetain: CUDA_ERROR_OUT_OF_MEMORY: out of memory; total memory reported: 34058272768

In [120]:
dpt_result = dpt_model.evaluate(x_test, y_test,batch_size=64)



In [121]:
print("dropout evaluate result: ", dpt_result)

dropout evaluate result:  [0.038484248871612184, 0.9922]


In [122]:
predictions = dpt_model.predict(x_test)
print('predictions shape:', predictions.shape)

predictions shape: (10000, 10)


In [123]:
import csv
predictions = dpt_model.predict(x_final)
print('predictions shape:', predictions.shape)

with open('MNIST_result.csv', mode='w') as result_file:
    writer = csv.writer(result_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    writer.writerow(['ID', 'Class'])
    i = 1
    for p in predictions:
        writer.writerow([i, np.argmax(p)])
        i += 1
# for p in predictions: 
#     print(np.argmax(p))

predictions shape: (4563, 10)
