In [69]:
import os
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

def load_training_data():
    csv_path = os.path.join("datasets", "train.csv")
    digits = pd.read_csv(csv_path)
    y = digits["label"]
    digits.drop('label', axis='columns', inplace=True)
    x = digits.values.reshape(-1, 28, 28, 1)
    return x, y

def load_test_data():
    csv_path = os.path.join("datasets", "test.csv")
    digits = pd.read_csv(csv_path)
    return digits.values.reshape(-1, 28, 28, 1)

def show_digit(x, index):
    some_digit= x[index]
    plt.imshow(some_digit, cmap="binary")
    plt.axis("off")
    plt.show()

class CheckAccuracyCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        accuracy = float(logs.get('accuracy'))
        if (accuracy > 0.998):
            self.model.stop_training = True
            print('\nReached 99% accuracy so cancelling training!')

def train_digit_recognizer(x, y):
    x_train = x / 255.0
    
    callbacks = CheckAccuracyCallback()
    model = tf.keras.models.Sequential([
        #tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(28, 28, 1)),
        #tf.keras.layers.MaxPooling2D(2, 2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(512, activation=tf.nn.relu),
        tf.keras.layers.Dense(10, activation=tf.nn.softmax)
    ])
    
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    history = model.fit(
        x_train, y, epochs=20, callbacks=[callbacks]
    )
    
    return model

def predict_digits(model, x):
    return np.argmax(model.predict(x), axis=-1)

def reshape_and_save(result):
    prediction = pd.DataFrame(data=result, columns=['Label'])
    prediction.index.names = ['ImageId']
    prediction.index += 1
    csv_path = os.path.join('datasets', 'result.csv')
    prediction.to_csv(csv_path)
    
# main program
x, y = load_training_data()
model = train_digit_recognizer(x, y)
x = load_test_data()
result = predict_digits(model, x)
reshape_and_save(result)






Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Reached 99% accuracy so cancelling training!
