In [None]:
import numpy as np
import tensorflow as tf

from sklearn.model_selection import train_test_split

SEED = 123

In [None]:
data_train = np.genfromtxt("train.csv", delimiter=',', dtype=np.float32)
data_test  = np.genfromtxt("test.csv",  delimiter=',', dtype=np.float32)

In [None]:
#label,pixel0,pixel1,pixel2,..,pixel783
X_train = data_train[1:,1:]   #(42000, 784)
y_train = data_train[1:,0:1]  #(42000, 1)
X_test  = data_test[1:,:]     #(28000, 784)

print("X_train.shape: {}".format(X_train.shape))
print("X_test.shape: {}".format(X_test.shape))

In [None]:
X_train = X_train.reshape((-1,28,28,1)) #(42000, 28, 28, 1)
X_test  = X_test.reshape((-1,28,28,1)) #(42000, 28, 28, 1)


X_train = X_train / 255.0
X_test  = X_test / 255.0

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.20, random_state=SEED)

print("X_train.shape: {}".format(X_train.shape))  #(33600, 28, 28, 1)
print("X_val.shape: {}".format(X_val.shape))      #(8400, 28, 28, 1)
print("X_test.shape: {}".format(X_test.shape))    #(28000, 28, 28, 1)

In [None]:
# Model: AlexNet Ver:1

def cnn_model_fn( features, labels, mode):
    # Kaggle Scoring: 0.98714
    # Train metrics: {'accuracy': 0.9946726, 'loss': 0.02303471, 'global_step': 10500}
    # Val metrics: {'accuracy': 0.98952383, 'loss': 0.045152489, 'global_step': 10500}
    
    # Conv1: 28x28@96
    model = tf.layers.conv2d( 
        inputs = features["x"], 
        filters = 96, 
        kernel_size = [5, 5],
        strides = 1, 
        padding = "SAME",
        activation = tf.nn.relu)
    
    # Conv2: 13x13@256
    model = tf.layers.conv2d( 
        inputs = model, 
        filters = 256, 
        kernel_size = [5, 5],
        strides = 1, 
        padding = "SAME",
        activation = tf.nn.relu)
    model = tf.layers.max_pooling2d( 
        inputs = model,
        pool_size = [ 3, 3],
        strides = 2)
    
    # Conv3: 13x13@384
    model = tf.layers.conv2d( 
        inputs = model, 
        filters = 384, 
        kernel_size = [3, 3],
        strides = 1, 
        padding = "SAME",
        activation = tf.nn.relu)
    
    # Conv4: 13x13@384
    model = tf.layers.conv2d( 
        inputs = model, 
        filters = 384, 
        kernel_size = [3, 3],
        strides = 1, 
        padding = "SAME",
        activation = tf.nn.relu)
    
    # Conv5: 6x6@256
    model = tf.layers.conv2d( 
        inputs = model, 
        filters = 256, 
        kernel_size = [3, 3],
        strides = 1, 
        padding = "SAME",
        activation = tf.nn.relu)
    model = tf.layers.max_pooling2d( 
        inputs = model,
        pool_size = [ 3, 3],
        strides = 2)
    
    model = tf.layers.flatten( inputs = model) # 9216 nodes
    
    # FC
    model = tf.layers.dense( 
        inputs = model,
        units = 4096,
        activation = tf.nn.relu)

    model = tf.layers.dense( 
        inputs = model,
        units = 4096,
        activation = tf.nn.relu)

    # Output (softmax)
    logits = tf.layers.dense( model, 10, name = "output_tensor")
    
    predictions = {
        "classes": tf.argmax( logits, axis=1),
        "probabilities": tf.nn.softmax( logits, name="softmax_tensor")
    }
    
    
    # PREDICT
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode = mode, predictions = predictions)
    
    
    # TRAIN
    onehot_labels = tf.one_hot( indices = tf.cast(labels, tf.int32), depth = 10)
    cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( 
        logits=logits, 
        labels=onehot_labels), name="cost_tensor")
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        train_op = tf.train.AdamOptimizer().minimize( cost, global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode = mode, loss = cost, train_op = train_op)
    
    
    # EVAL
    eval_metrics_ops = {
        "accuracy": tf.metrics.accuracy(
            labels = labels, 
            predictions = predictions["classes"])
    }
    return tf.estimator.EstimatorSpec( mode = mode, loss = cost, eval_metric_ops = eval_metrics_ops)

In [None]:
def train_model( X_train, y_train, X_val, y_val):
    model = tf.estimator.Estimator(
        model_fn = cnn_model_fn)#,
        #model_dir="/tmp/cnn_digits_model")

    model.train(
        input_fn = tf.estimator.inputs.numpy_input_fn(
            x = {"x": X_train},
            y = y_train,
            batch_size = 32,
            num_epochs = 10,
            shuffle = True))

    # Model performance
    train_metrics = model.evaluate(
        input_fn = tf.estimator.inputs.numpy_input_fn(
            x = {"x": X_train},
            y = y_train,
            batch_size = 32,
            num_epochs = 1,
            shuffle = True))
    print(">> Train metrics: %r"% train_metrics)

    val_metrics = model.evaluate(
        input_fn = tf.estimator.inputs.numpy_input_fn(
            x = {"x": X_val},
            y = y_val,
            batch_size = 32,
            num_epochs = 1,
            shuffle = True))
    print(">> Val metrics: %r"% val_metrics)
    
    return model

In [None]:
def predict_digits( model, X_test):
    predictions = model.predict(
        input_fn = tf.estimator.inputs.numpy_input_fn(
            x = {"x": X_test},
            num_epochs = 1,
            shuffle = False))
    
    csv = open( "digit_prediction.csv", "w")
    csv.write("ImageId,Label\n")
    for i, pred in enumerate(predictions):
        csv.write("{},{}\n".format( i+1, pred["classes"]))
    csv.close()

In [None]:
tf.logging.set_verbosity(tf.logging.INFO)

with tf.Session() as sess:
    tf.set_random_seed( SEED)
    model = train_model( X_train, y_train, X_val, y_val)
    predict_digits( model, X_test)