[View in Colaboratory](https://colab.research.google.com/github/JohnnyUrosevic/Machine-Learning-Shenanigans/blob/master/kaggle_digits.ipynb)

In [0]:
import tensorflow as tf
import numpy as np

from google.colab import files

In [2]:
from google.colab import auth
from googleapiclient.discovery import build
import io , requests, os
import sys
auth.authenticate_user()
from googleapiclient.discovery import build
drive_service = build('drive', 'v3')

SOURCE_FOLDER='/content/datalab/'


def get_parent_folder(folder_name):
  page_token = None
  folder_array = []
  query = "name='%s' and mimeType='application/vnd.google-apps.folder'" % folder_name
  while True:
      response = drive_service.files().list(q=query,
                                          spaces='drive',
                                          fields='nextPageToken, files(id, name)',
                                          pageToken=page_token).execute()
      for file in response.get('files', []):
          # Process change
          #print (file.get('name'), file.get('id'))
          folder_array.append({"name" : file.get('name'), "id" : file.get('id')})
      page_token = response.get('nextPageToken', None)
      if page_token is None:
          break
  return folder_array


def get_files_from_parent(parent_id):
  page_token = None
  folder_array = dict()
  query = "'%s' in parents" % parent_id
  while True:
      response = drive_service.files().list(q=query,
                                          spaces='drive',
                                          fields='nextPageToken, files(id, name)',
                                          pageToken=page_token).execute()
      for file in response.get('files', []):
          # Process change
          #print (file.get('name'), file.get('id'))
          folder_array.update({file.get('name'):file.get('id')})
      page_token = response.get('nextPageToken', None)
      if page_token is None:
          break
  return folder_array

def get_file_buffer(file_id, verbose=0):
  from googleapiclient.http import MediaIoBaseDownload
  request = drive_service.files().get_media(fileId=file_id)
  downloaded = io.BytesIO()
  downloader = MediaIoBaseDownload(downloaded, request)
  done = False
  while done is False:
    # _ is a placeholder for a progress object that we ignore.
    # (Our file is small, so we skip reporting progress.)
    progress, done = downloader.next_chunk()
    if verbose:
      sys.stdout.flush()
      sys.stdout.write('\r')
      percentage_done = progress.resumable_progress * 100/progress.total_size
      sys.stdout.write("[%-100s] %d%%" % ('='*int(percentage_done), int(percentage_done)))
  downloaded.seek(0)
  return downloaded

parent_folder = get_parent_folder('Kaggle Digit')

input_file_meta = get_files_from_parent(parent_folder[0]["id"])


for file, id in input_file_meta.items():
  downloaded = get_file_buffer(id, verbose=1)
  dest_file = os.path.join(SOURCE_FOLDER, file)
  print("processing %s data" % file)
  with open(dest_file, "wb") as out:
    out.write(downloaded.read())
    print("Done %s" % dest_file)

KeyboardInterrupt: ignored

In [0]:
#Define Model
def model_fn(features, labels, mode):
    #input layer
    input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])

    #convolution layer 1
    conv1 = tf.layers.conv2d(input_layer, filters=20, kernel_size=[5,5], padding="valid", activation=tf.nn.elu)

    #pool layer 1
    #input [batch_size, 24, 24, 20]
    #output [batch_size, 12, 12, 20]
    pool1 = tf.layers.max_pooling2d(conv1, pool_size=[2,2],strides=2)

    #convolution layer 2
    conv2 = tf.layers.conv2d(pool1, filters=40, kernel_size=[5,5], padding="valid", activation=tf.nn.elu)
    
    #pool layer 2
    #input [batch_size, 8, 8, 40]
    #output [batch_size, 4, 4, 40]
    pool2 = tf.layers.max_pooling2d(conv2, pool_size=[2,2], strides=2)

    flat = tf.reshape(pool2, [-1, 4 * 4 * 40])
    
    #dense layer 1
    dense1 = tf.layers.dense(flat, units=100, activation=tf.nn.elu)
    
    #dropout layer
    dropout1 = tf.layers.dropout(dense1, rate=.2, training=mode == tf.estimator.ModeKeys.TRAIN)
    
    #dense layer 2
    dense2 = tf.layers.dense(dropout1, units=100, activation=tf.nn.elu)
    
    #dropout layer
    dropout2 = tf.layers.dropout(dense2, rate=.1, training=mode == tf.estimator.ModeKeys.TRAIN)
    
    #output layer
    logits = tf.layers.dense(dropout2, units=10, activation=None)
        
    predicted = tf.argmax(logits, 1)
        

    
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'class_ids': predicted[:, tf.newaxis],
            'probabilities': tf.nn.softmax(logits),
            'logits': logits
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
        
    if mode == tf.estimator.ModeKeys.EVAL:
        accuracy = tf.metrics.accuracy(labels=labels, predictions=predicted, name='accuracy_op')
        metrics={'accuracy': accuracy}
        return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
    
    #training
    learning_rate = tf.train.exponential_decay(.001, tf.train.get_global_step(), 3000, .9)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)




In [4]:
#Get input
data = np.loadtxt("/content/datalab/train.csv", skiprows=1, delimiter=',')

features = np.reshape(data[:, 1:], (-1, 28, 28))

#augment data
expanded_features = np.append(features, np.append(np.around(features * .75), np.around(features * .5), 0), 0)

shift_down = np.roll(expanded_features, 1, 1)
shift_down[:, 0, :] = np.zeros(28)

shift_up = np.roll(expanded_features, -1, 1)
shift_up[:, 27, :] = np.zeros(28)

shift_right = np.roll(expanded_features, 1, 2)
shift_right[:, :, 0] = np.zeros(28)

shift_left = np.roll(expanded_features, -1, 2)
shift_left[:, :, 27] = np.zeros(28)

expanded_features = np.append(expanded_features, shift_down, 0)
expanded_features = np.append(expanded_features, shift_up, 0)
expanded_features = np.append(expanded_features, shift_left, 0)
expanded_features = np.append(expanded_features, shift_right, 0)

labels = np.reshape(data[:, 0], (-1))
labels = labels.astype(np.int32)
expanded_labels1 = np.append(labels, np.append(labels, labels, 0), 0)

expanded_labels = expanded_labels1
expanded_labels = np.append(expanded_labels, expanded_labels1, 0)
expanded_labels = np.append(expanded_labels, expanded_labels1, 0)
expanded_labels = np.append(expanded_labels, expanded_labels1, 0)
expanded_labels = np.append(expanded_labels, expanded_labels1, 0)

model = tf.estimator.Estimator(model_fn=model_fn, model_dir='/content/datalab/')

train_fn = tf.estimator.inputs.numpy_input_fn(
    x={'x': expanded_features},
    y=expanded_labels,
    batch_size=150,
    num_epochs=None,
    shuffle=True
)

model.train(input_fn=train_fn, steps=100000)

eval_fn = tf.estimator.inputs.numpy_input_fn(
    x={'x': expanded_features},
    y=expanded_labels,
    batch_size=150,
    num_epochs=1,
    shuffle=False
)

model.evaluate(input_fn=eval_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/content/datalab/model2', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1b5fb39e10>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /content/datalab/model2/model.ckpt-200000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_i

INFO:tensorflow:loss = 0.1282542645931244, step = 202000 (1.117 sec)
INFO:tensorflow:global_step/sec: 90.3327
INFO:tensorflow:loss = 0.09512761980295181, step = 202100 (1.110 sec)
INFO:tensorflow:global_step/sec: 89.8359
INFO:tensorflow:loss = 0.07930755615234375, step = 202200 (1.110 sec)
INFO:tensorflow:global_step/sec: 91.6238
INFO:tensorflow:loss = 0.12363055348396301, step = 202300 (1.091 sec)
INFO:tensorflow:global_step/sec: 90.1336
INFO:tensorflow:loss = 0.07373738288879395, step = 202400 (1.109 sec)
INFO:tensorflow:global_step/sec: 90.1004
INFO:tensorflow:loss = 0.021376052871346474, step = 202500 (1.113 sec)
INFO:tensorflow:global_step/sec: 89.8221
INFO:tensorflow:loss = 0.06687074154615402, step = 202600 (1.110 sec)
INFO:tensorflow:global_step/sec: 89.7382
INFO:tensorflow:loss = 0.020862333476543427, step = 202700 (1.124 sec)
INFO:tensorflow:global_step/sec: 89.0575
INFO:tensorflow:loss = 0.08195458352565765, step = 202800 (1.114 sec)
INFO:tensorflow:global_step/sec: 90.7556


INFO:tensorflow:global_step/sec: 89.6505
INFO:tensorflow:loss = 0.030522385612130165, step = 205000 (1.114 sec)
INFO:tensorflow:global_step/sec: 89.9592
INFO:tensorflow:loss = 0.08356089144945145, step = 205100 (1.114 sec)
INFO:tensorflow:global_step/sec: 89.9121
INFO:tensorflow:loss = 0.03323417529463768, step = 205200 (1.110 sec)
INFO:tensorflow:global_step/sec: 90.5882
INFO:tensorflow:loss = 0.13144756853580475, step = 205300 (1.108 sec)
INFO:tensorflow:global_step/sec: 89.7613
INFO:tensorflow:loss = 0.05873526260256767, step = 205400 (1.115 sec)
INFO:tensorflow:global_step/sec: 89.6431
INFO:tensorflow:loss = 0.0979529321193695, step = 205500 (1.110 sec)
INFO:tensorflow:global_step/sec: 90.5932
INFO:tensorflow:loss = 0.0675472617149353, step = 205600 (1.113 sec)
INFO:tensorflow:global_step/sec: 89.4607
INFO:tensorflow:loss = 0.1418287754058838, step = 205700 (1.115 sec)
INFO:tensorflow:global_step/sec: 90.7575
INFO:tensorflow:loss = 0.05283736065030098, step = 205800 (1.098 sec)
INF

INFO:tensorflow:loss = 0.09426437318325043, step = 207900 (1.116 sec)
INFO:tensorflow:global_step/sec: 89.9337
INFO:tensorflow:loss = 0.07577202469110489, step = 208000 (1.110 sec)
INFO:tensorflow:global_step/sec: 90.0706
INFO:tensorflow:loss = 0.16050799190998077, step = 208100 (1.109 sec)
INFO:tensorflow:global_step/sec: 89.8446
INFO:tensorflow:loss = 0.07511422783136368, step = 208200 (1.109 sec)
INFO:tensorflow:global_step/sec: 90.2224
INFO:tensorflow:loss = 0.094809889793396, step = 208300 (1.113 sec)
INFO:tensorflow:global_step/sec: 89.395
INFO:tensorflow:loss = 0.03039596602320671, step = 208400 (1.124 sec)
INFO:tensorflow:global_step/sec: 90.0792
INFO:tensorflow:loss = 0.03945155441761017, step = 208500 (1.106 sec)
INFO:tensorflow:global_step/sec: 90.103
INFO:tensorflow:loss = 0.03729410469532013, step = 208600 (1.103 sec)
INFO:tensorflow:global_step/sec: 90.2814
INFO:tensorflow:loss = 0.13758385181427002, step = 208700 (1.111 sec)
INFO:tensorflow:global_step/sec: 89.4434
INFO:

INFO:tensorflow:global_step/sec: 89.9451
INFO:tensorflow:loss = 0.040775854140520096, step = 210900 (1.112 sec)
INFO:tensorflow:global_step/sec: 90.3997
INFO:tensorflow:loss = 0.03679102659225464, step = 211000 (1.106 sec)
INFO:tensorflow:global_step/sec: 89.6403
INFO:tensorflow:loss = 0.06222635507583618, step = 211100 (1.119 sec)
INFO:tensorflow:global_step/sec: 90.1246
INFO:tensorflow:loss = 0.045585524290800095, step = 211200 (1.106 sec)
INFO:tensorflow:global_step/sec: 90.0683
INFO:tensorflow:loss = 0.12237129360437393, step = 211300 (1.120 sec)
INFO:tensorflow:global_step/sec: 88.9502
INFO:tensorflow:loss = 0.07086849212646484, step = 211400 (1.115 sec)
INFO:tensorflow:global_step/sec: 89.9813
INFO:tensorflow:loss = 0.04685376584529877, step = 211500 (1.112 sec)
INFO:tensorflow:global_step/sec: 89.2032
INFO:tensorflow:loss = 0.049813251942396164, step = 211600 (1.121 sec)
INFO:tensorflow:global_step/sec: 88.7336
INFO:tensorflow:loss = 0.10470324754714966, step = 211700 (1.127 sec

KeyboardInterrupt: ignored

In [6]:
model = tf.estimator.Estimator(model_fn=model_fn, model_dir='/content/datalab/')

#Get input
data = np.loadtxt("/content/datalab/test.csv", skiprows=1, delimiter=',')

test_features = np.reshape(data, (-1, 28, 28))

test_fn = tf.estimator.inputs.numpy_input_fn(
    x={'x': test_features},
    y=None,
    batch_size=150,
    num_epochs=1,
    shuffle=False
) 

predictions = model.predict(input_fn=test_fn)

i = 1
with open('submission.csv', 'w') as file:
    file.write('ImageId,Label\n')
    for pred_dict in predictions:
        file.write("{},{}\n".format(i, pred_dict["class_ids"][0]))
        i += 1

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/content/datalab/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f4e01a3ce48>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /content/datalab/model.ckpt-200000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [4]:
from google.colab import files
files.download('submission.csv')

In [11]:
#predicting your own handwriting
!pip install imageio
import imageio
from google.colab import files
!rm input.png
files.upload()

im = imageio.imread('input.png')
im = np.array(255 - im[:, :, 0], dtype=np.float)

predict_fn = tf.estimator.inputs.numpy_input_fn(
    x={'x': im},
    y=None,
    batch_size=150,
    num_epochs=1,
    shuffle=False
)

predictions = model.predict(input_fn=predict_fn)

for pred_dict in predictions:
    print(pred_dict["class_ids"][0])



Saving input.png to input.png
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /content/datalab/model.ckpt-200000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
7
