[View in Colaboratory](https://colab.research.google.com/github/JohnnyUrosevic/Machine-Learning-Shenanigans/blob/master/kaggle_digits.ipynb)

In [1]:
import tensorflow as tf
import numpy as np

from google.colab import files

In [10]:
from google.colab import auth
from googleapiclient.discovery import build
import io , requests, os
import sys
auth.authenticate_user()
from googleapiclient.discovery import build
drive_service = build('drive', 'v3')

SOURCE_FOLDER='/content/datalab/'


def get_parent_folder(folder_name):
  page_token = None
  folder_array = []
  query = "name='%s' and mimeType='application/vnd.google-apps.folder'" % folder_name
  while True:
      response = drive_service.files().list(q=query,
                                          spaces='drive',
                                          fields='nextPageToken, files(id, name)',
                                          pageToken=page_token).execute()
      for file in response.get('files', []):
          # Process change
          #print (file.get('name'), file.get('id'))
          folder_array.append({"name" : file.get('name'), "id" : file.get('id')})
      page_token = response.get('nextPageToken', None)
      if page_token is None:
          break
  return folder_array


def get_files_from_parent(parent_id):
  page_token = None
  folder_array = dict()
  query = "'%s' in parents" % parent_id
  while True:
      response = drive_service.files().list(q=query,
                                          spaces='drive',
                                          fields='nextPageToken, files(id, name)',
                                          pageToken=page_token).execute()
      for file in response.get('files', []):
          # Process change
          #print (file.get('name'), file.get('id'))
          folder_array.update({file.get('name'):file.get('id')})
      page_token = response.get('nextPageToken', None)
      if page_token is None:
          break
  return folder_array

def get_file_buffer(file_id, verbose=0):
  from googleapiclient.http import MediaIoBaseDownload
  request = drive_service.files().get_media(fileId=file_id)
  downloaded = io.BytesIO()
  downloader = MediaIoBaseDownload(downloaded, request)
  done = False
  while done is False:
    # _ is a placeholder for a progress object that we ignore.
    # (Our file is small, so we skip reporting progress.)
    progress, done = downloader.next_chunk()
    if verbose:
      sys.stdout.flush()
      sys.stdout.write('\r')
      percentage_done = progress.resumable_progress * 100/progress.total_size
      sys.stdout.write("[%-100s] %d%%" % ('='*int(percentage_done), int(percentage_done)))
  downloaded.seek(0)
  return downloaded

parent_folder = get_parent_folder('Kaggle Digit')

input_file_meta = get_files_from_parent(parent_folder[0]["id"])


for file, id in input_file_meta.items():
  downloaded = get_file_buffer(id, verbose=1)
  dest_file = os.path.join(SOURCE_FOLDER, file)
  print("processing %s data" % file)
  with open(dest_file, "wb") as out:
    out.write(downloaded.read())
    print("Done %s" % dest_file)

Done /content/datalab/test.csv
Done /content/datalab/train.csv


In [106]:
#Define Model
def model_fn(features, labels, mode):
    #input layer
    input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])

    #convolution layer 1
    conv1 = tf.layers.conv2d(input_layer, filters=20, kernel_size=[5,5], padding="valid", activation=tf.nn.relu)

    #pool layer 1
    #input [batch_size, 24, 24, 20]
    #output [batch_size, 12, 12, 20]
    pool1 = tf.layers.max_pooling2d(conv1, pool_size=[2,2],strides=2)

    #convolution layer 2
    conv2 = tf.layers.conv2d(pool1, filters=40, kernel_size=[5,5], padding="valid", activation=tf.nn.relu)
    
    #pool layer 2
    #input [batch_size, 8, 8, 40]
    #output [batch_size, 4, 4, 40]
    pool2 = tf.layers.max_pooling2d(conv2, pool_size=[2,2], strides=2)

    flat = tf.reshape(pool2, [-1, 4 * 4 * 40])
    
    #dense layer 1
    dense1 = tf.layers.dense(flat, units=100, activation=tf.nn.relu)
    
    #dropout layer
    dropout1 = tf.layers.dropout(dense1, rate=.1, training=mode == tf.estimator.ModeKeys.TRAIN)
    
    #dense layer 2
    dense2 = tf.layers.dense(dropout1, units=100, activation=tf.nn.relu)
    
    #dropout layer
    dropout2 = tf.layers.dropout(dense2, rate=.1, training=mode == tf.estimator.ModeKeys.TRAIN)
    
    #output layer
    logits = tf.layers.dense(dropout2, units=10, activation=None)
        
    predicted = tf.argmax(logits, 1)
        

    
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'class_ids': predicted[:, tf.newaxis],
            'probabilities': tf.nn.softmax(logits),
            'logits': logits
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
        
    if mode == tf.estimator.ModeKeys.EVAL:
        accuracy = tf.metrics.accuracy(labels=labels, predictions=predicted, name='accuracy_op')
        metrics={'accuracy': accuracy}
        return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
    
    #training
    optimizer = tf.train.GradientDescentOptimizer(.001)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)




In [107]:
#Get input
data = np.loadtxt("/content/datalab/train.csv", skiprows=1, delimiter=',')

features = np.reshape(data[:, 1:], (-1, 28, 28))
labels = np.reshape(data[:, 0], (-1))
labels = labels.astype(np.int32)

model = tf.estimator.Estimator(model_fn=model_fn)

train_fn = tf.estimator.inputs.numpy_input_fn(
    x={'x': features},
    y=labels,
    batch_size=150,
    num_epochs=None,
    shuffle=True
)

model.train(input_fn=train_fn, steps=20000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpbdxyovbi', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f742677ab38>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpbdxyovbi/model.ckpt.
INFO:ten

INFO:tensorflow:global_step/sec: 91.658
INFO:tensorflow:loss = 0.11012443900108337, step = 2100 (1.090 sec)
INFO:tensorflow:global_step/sec: 91.7352
INFO:tensorflow:loss = 0.1828228235244751, step = 2200 (1.088 sec)
INFO:tensorflow:global_step/sec: 91.2853
INFO:tensorflow:loss = 0.20091097056865692, step = 2300 (1.096 sec)
INFO:tensorflow:global_step/sec: 92.5351
INFO:tensorflow:loss = 0.25142040848731995, step = 2400 (1.079 sec)
INFO:tensorflow:global_step/sec: 92.6387
INFO:tensorflow:loss = 0.18735474348068237, step = 2500 (1.078 sec)
INFO:tensorflow:global_step/sec: 91.5358
INFO:tensorflow:loss = 0.1770717203617096, step = 2600 (1.098 sec)
INFO:tensorflow:global_step/sec: 91.9503
INFO:tensorflow:loss = 0.13925203680992126, step = 2700 (1.087 sec)
INFO:tensorflow:global_step/sec: 91.9414
INFO:tensorflow:loss = 0.17182114720344543, step = 2800 (1.083 sec)
INFO:tensorflow:global_step/sec: 91.1176
INFO:tensorflow:loss = 0.11118756979703903, step = 2900 (1.097 sec)
INFO:tensorflow:global

INFO:tensorflow:global_step/sec: 91.8032
INFO:tensorflow:loss = 0.18760524690151215, step = 5100 (1.087 sec)
INFO:tensorflow:global_step/sec: 93.05
INFO:tensorflow:loss = 0.060143325477838516, step = 5200 (1.072 sec)
INFO:tensorflow:global_step/sec: 92.4111
INFO:tensorflow:loss = 0.06590881198644638, step = 5300 (1.082 sec)
INFO:tensorflow:global_step/sec: 92.0232
INFO:tensorflow:loss = 0.14205095171928406, step = 5400 (1.094 sec)
INFO:tensorflow:global_step/sec: 92.8786
INFO:tensorflow:loss = 0.11998090147972107, step = 5500 (1.069 sec)
INFO:tensorflow:global_step/sec: 92.4764
INFO:tensorflow:loss = 0.23196808993816376, step = 5600 (1.086 sec)
INFO:tensorflow:global_step/sec: 91.8889
INFO:tensorflow:loss = 0.0853552296757698, step = 5700 (1.086 sec)
INFO:tensorflow:global_step/sec: 92.406
INFO:tensorflow:loss = 0.1008608415722847, step = 5800 (1.082 sec)
INFO:tensorflow:global_step/sec: 92.8485
INFO:tensorflow:loss = 0.17415748536586761, step = 5900 (1.075 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 92.1669
INFO:tensorflow:loss = 0.095786452293396, step = 8100 (1.084 sec)
INFO:tensorflow:global_step/sec: 92.3109
INFO:tensorflow:loss = 0.06898544728755951, step = 8200 (1.084 sec)
INFO:tensorflow:global_step/sec: 91.6419
INFO:tensorflow:loss = 0.03994316980242729, step = 8300 (1.090 sec)
INFO:tensorflow:global_step/sec: 92.0407
INFO:tensorflow:loss = 0.08329841494560242, step = 8400 (1.085 sec)
INFO:tensorflow:global_step/sec: 92.0986
INFO:tensorflow:loss = 0.08614461869001389, step = 8500 (1.089 sec)
INFO:tensorflow:global_step/sec: 92.5477
INFO:tensorflow:loss = 0.07593721151351929, step = 8600 (1.077 sec)
INFO:tensorflow:global_step/sec: 91.8972
INFO:tensorflow:loss = 0.1375965178012848, step = 8700 (1.091 sec)
INFO:tensorflow:global_step/sec: 92.312
INFO:tensorflow:loss = 0.04607611894607544, step = 8800 (1.081 sec)
INFO:tensorflow:global_step/sec: 92.5062
INFO:tensorflow:loss = 0.10427474230527878, step = 8900 (1.082 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 91.5915
INFO:tensorflow:loss = 0.10992375761270523, step = 11100 (1.094 sec)
INFO:tensorflow:global_step/sec: 91.5649
INFO:tensorflow:loss = 0.030426902696490288, step = 11200 (1.090 sec)
INFO:tensorflow:global_step/sec: 91.7433
INFO:tensorflow:loss = 0.07190392911434174, step = 11300 (1.090 sec)
INFO:tensorflow:global_step/sec: 91.6213
INFO:tensorflow:loss = 0.08744525164365768, step = 11400 (1.094 sec)
INFO:tensorflow:global_step/sec: 92.7357
INFO:tensorflow:loss = 0.048176635056734085, step = 11500 (1.075 sec)
INFO:tensorflow:global_step/sec: 92.0846
INFO:tensorflow:loss = 0.019262418150901794, step = 11600 (1.086 sec)
INFO:tensorflow:global_step/sec: 92.7387
INFO:tensorflow:loss = 0.03704649955034256, step = 11700 (1.081 sec)
INFO:tensorflow:global_step/sec: 92.5254
INFO:tensorflow:loss = 0.03200158104300499, step = 11800 (1.081 sec)
INFO:tensorflow:global_step/sec: 92.1293
INFO:tensorflow:loss = 0.08250582218170166, step = 11900 (1.085 sec)
INFO:te

INFO:tensorflow:loss = 0.02890045940876007, step = 14000 (1.093 sec)
INFO:tensorflow:global_step/sec: 91.9269
INFO:tensorflow:loss = 0.06208852306008339, step = 14100 (1.093 sec)
INFO:tensorflow:global_step/sec: 91.7389
INFO:tensorflow:loss = 0.03592549264431, step = 14200 (1.085 sec)
INFO:tensorflow:global_step/sec: 91.3833
INFO:tensorflow:loss = 0.06429128348827362, step = 14300 (1.094 sec)
INFO:tensorflow:global_step/sec: 91.1827
INFO:tensorflow:loss = 0.014835872687399387, step = 14400 (1.096 sec)
INFO:tensorflow:global_step/sec: 91.5405
INFO:tensorflow:loss = 0.03584299236536026, step = 14500 (1.097 sec)
INFO:tensorflow:global_step/sec: 92.1722
INFO:tensorflow:loss = 0.026871949434280396, step = 14600 (1.085 sec)
INFO:tensorflow:global_step/sec: 92.1361
INFO:tensorflow:loss = 0.08904548734426498, step = 14700 (1.087 sec)
INFO:tensorflow:global_step/sec: 92.1495
INFO:tensorflow:loss = 0.04581248015165329, step = 14800 (1.085 sec)
INFO:tensorflow:global_step/sec: 91.2651
INFO:tensor

INFO:tensorflow:global_step/sec: 92.4355
INFO:tensorflow:loss = 0.07123685628175735, step = 17000 (1.082 sec)
INFO:tensorflow:global_step/sec: 91.3088
INFO:tensorflow:loss = 0.0356941856443882, step = 17100 (1.099 sec)
INFO:tensorflow:global_step/sec: 92.7782
INFO:tensorflow:loss = 0.0182515699416399, step = 17200 (1.079 sec)
INFO:tensorflow:global_step/sec: 91.6513
INFO:tensorflow:loss = 0.08111484348773956, step = 17300 (1.086 sec)
INFO:tensorflow:global_step/sec: 92.5589
INFO:tensorflow:loss = 0.0764312595129013, step = 17400 (1.082 sec)
INFO:tensorflow:global_step/sec: 92.682
INFO:tensorflow:loss = 0.03385167196393013, step = 17500 (1.078 sec)
INFO:tensorflow:global_step/sec: 92.7902
INFO:tensorflow:loss = 0.03581790253520012, step = 17600 (1.078 sec)
INFO:tensorflow:global_step/sec: 91.64
INFO:tensorflow:loss = 0.0996793732047081, step = 17700 (1.093 sec)
INFO:tensorflow:global_step/sec: 91.4301
INFO:tensorflow:loss = 0.03877709060907364, step = 17800 (1.092 sec)
INFO:tensorflow:g

INFO:tensorflow:Saving checkpoints for 20000 into /tmp/tmpbdxyovbi/model.ckpt.
INFO:tensorflow:Loss for final step: 0.03119114600121975.


<tensorflow.python.estimator.estimator.Estimator at 0x7f742677aac8>

In [109]:
eval_fn = tf.estimator.inputs.numpy_input_fn(
    x={'x': features},
    y=labels,
    batch_size=150,
    num_epochs=1,
    shuffle=False
)

model.evaluate(input_fn=eval_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-10-00:05:40
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpbdxyovbi/model.ckpt-20000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-07-10-00:05:42
INFO:tensorflow:Saving dict for global step 20000: accuracy = 0.9950238, global_step = 20000, loss = 0.01790285
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 20000: /tmp/tmpbdxyovbi/model.ckpt-20000


{'accuracy': 0.9950238, 'global_step': 20000, 'loss': 0.01790285}

In [108]:
#Get input
data = np.loadtxt("/content/datalab/test.csv", skiprows=1, delimiter=',')

test_features = np.reshape(data, (-1, 28, 28))

test_fn = tf.estimator.inputs.numpy_input_fn(
    x={'x': test_features},
    y=None,
    batch_size=150,
    num_epochs=1,
    shuffle=False
) 

predictions = model.predict(input_fn=test_fn)

i = 1
with open('submission.csv', 'w') as file:
    file.write('ImageId,Label\n')
    for pred_dict in predictions:
        file.write("{},{}\n".format(i, pred_dict["class_ids"][0]))
        i += 1

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpbdxyovbi/model.ckpt-20000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


KeyboardInterrupt: ignored

In [99]:
from google.colab import files
files.download('submission.csv')