<a href="https://colab.research.google.com/github/mtedder/AI-ML-Workshop/blob/master/notebooks/mnist_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#MNIST Keras Project

[Ref 1](https://www.kaggle.com/yufengg/emnist-gpu-keras-to-tf)

[Ref 2](https://codelabs.developers.google.com/codelabs/cloud-tensorflow-mnist/#1)

[Ref 3](https://github.com/tensorflow/models/tree/master/official/mnist)

[Image Classification
](https://towardsdatascience.com/image-classification-in-10-minutes-with-mnist-dataset-54c35b77a38d)

[Keras Blog Example](https://blog.keras.io/keras-as-a-simplified-interface-to-tensorflow-tutorial.html#exporting-a-model-with-tensorflow-serving)

[emnist GPU keras to TF](https://www.kaggle.com/yufengg/emnist-gpu-keras-to-tf/notebook)

[Object detection](https://towardsdatascience.com/is-google-tensorflow-object-detection-api-the-easiest-way-to-implement-image-recognition-a8bd1f500ea0)

[Object detection](https://www.edureka.co/blog/tensorflow-object-detection-tutorial/)

[Computer vision](https://towardsdatascience.com/how-to-do-everything-in-computer-vision-2b442c469928)

In [0]:
#Uncommet this code if you need colboratory tensorflow version to match the versions available in google Cloud ML
#THIS IS REQUIRED
#!pip install tensorflow==1.12.0
# !sudo pip install tensorflow==1.11 #Raspberry pi version
#!pip install h5py

In [0]:
# !rm -rf 1
# !rm -rf estimator_model export model1

##Imports

In [0]:
# import tensorflow as tf
# sess = tf.Session()

from tensorflow import keras
# from keras import backend as K

# K.set_session(sess)
# K.set_learning_phase(0) # all new operations will be in test mode from now on

# from functools import partial
# from tensorflow.python.saved_model import builder as saved_model_builder
# from tensorflow.python.saved_model import tag_constants, signature_constants, signature_def_utils_impl, utils

from functools import partial
import tensorflow as tf

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

#import tensorflow as tf
import matplotlib.pyplot as plt

##Version Info

In [0]:
print("Keras version " + keras.__version__)
print("Tensorflow version" + tf.__version__)
# !python --version

##Data Preparation

Also available from: (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

[Original Dataset](https://www.nist.gov/node/1298471/emnist-dataset)

###Optional datasets

In [0]:
# Download full emnist dataset
# !wget http://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip
# Unzip to new directory - suppress zipfile directory name
# !unzip -j gzip.zip -d mnist_datasets
# Unzip desired gz files
# !gzip -d mnist_datasets/emnist-digits-train-images-idx3-ubyte.gz

In [0]:
train_data_path = '../content/sample_data/mnist_train_small.csv'
test_data_path = '../content/sample_data/mnist_test.csv'

train_data = pd.read_csv(train_data_path, header=None)
test_data = pd.read_csv(test_data_path, header=None)

train_data

##Feature Extraction
Ref:

Helper functions

In [0]:
def show_img(data, row_num):
    img_flip = np.transpose(data.values[row_num,1:].reshape(28, 28), axes=[1,0]) # img_size * img_size arrays
    plt.title('Class: ' + str(data.values[row_num,0]) + ', Label: ' + str(class_mapping[data.values[row_num,0]]))
    plt.imshow(data.values[row_num, 1:].reshape([28, 28]), cmap='Greys_r')
    
# 10 digits, 26 letters, and 11 capital letters that are different looking from their lowercase counterparts
num_classes = 10 
img_size = 28

def img_label_load(data_path, num_classes=None):
    data = pd.read_csv(data_path, header=None)
    data_rows = len(data)
    if not num_classes:
        num_classes = len(data[0].unique())
    
    # this assumes square imgs. Should be 28x28
    img_size = int(np.sqrt(len(data.iloc[0][1:])))
    
    # Images do not need to be transposed. This line also does the reshaping needed.
    imgs = data.values[:,1:].reshape(data_rows, img_size, img_size, 1) # img_size * img_size arrays
    
    labels = keras.utils.to_categorical(data.values[:,0], num_classes) # one-hot encoding vectors
    
    return imgs/255., labels    

In [0]:
# The classes of this balanced dataset are as follows. Index into it based on class label
#source data: https://arxiv.org/pdf/1702.05373.pdf
#class_mapping = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabdefghnqrt' #numbers & letters
class_mapping = '0123456789' #numbers only

In [0]:
# View data
show_img(train_data, 0)

##Build Model
Ref:

In [0]:
#https://www.pyimagesearch.com/2018/12/31/keras-conv2d-and-convolutional-layers/
#above link includes explanation of dropout - used to not overfit and better generalize
model = keras.models.Sequential()

# model.add(keras.layers.Reshape((img_size,img_size,1), input_shape=(784,)))
model.add(keras.layers.Conv2D(filters=12, kernel_size=(5,5), strides=2, activation='relu', 
                              input_shape=(img_size,img_size,1)))
# model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.Dropout(.5))

model.add(keras.layers.Conv2D(filters=18, kernel_size=(3,3) , strides=2, activation='relu'))
# model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(keras.layers.Dropout(.5))

model.add(keras.layers.Conv2D(filters=24, kernel_size=(2,2), activation='relu'))
# model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))

# model.add(keras.layers.Conv2D(filters=30, kernel_size=(3,3), activation='relu'))

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(units=150, activation='relu'))
model.add(keras.layers.Dense(units=num_classes, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

print(model.input_names)
print(model.output_names)

##Train Model
Ref:

In [0]:
X, y = img_label_load(train_data_path)
print(X.shape)

In [0]:

data_generator = keras.preprocessing.image.ImageDataGenerator(validation_split=.2)

training_data_generator = data_generator.flow(X, y, subset='training')
validation_data_generator = data_generator.flow(X, y, subset='validation')

history = model.fit_generator(training_data_generator, 
                              steps_per_epoch=500, epochs=10, # can change epochs to 10
                              validation_data=validation_data_generator)

##Evaluate Model

In [0]:
test_data_generator = data_generator.flow(X, y)

model.evaluate_generator(test_data_generator)

hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

##Inference – Make Predictions

In [0]:
show_img(test_data, 100)

#skip for ML serving
X_test, y_test = img_label_load(test_data_path) # loads images and orients for model

#skip for ML serving
def run_prediction(idx):
    result = np.argmax(model.predict(X_test[idx:idx+1]))
    print('Prediction: ', result, ', Char: ', class_mapping[result])
    print('Label: ', test_data.values[idx,0])
    show_img(test_data, idx)
    
#skip for ML serving
import random

for _ in range(1,10):
    idx = random.randint(0, 47-1)
    run_prediction(idx)    

#Deploy to ML Cloud Engine

##Prepare Model for Saving

In [0]:
#!rm -rf estimator_model export
#model.input_names[0]
#!zip -r export.zip export/

In [0]:
# First, convert Keras Model to TensorFlow Estimator - save estimator model to a directory (this is not saving the keras model)
model_input_name = model.input_names[0]
estimator_model = keras.estimator.model_to_estimator(keras_model=model, model_dir="./estimator_model")
print(model_input_name)

In [0]:
#EXAMPLE CODE
# https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator
def input_function(features,labels=None,shuffle=False):
    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"input_node": features},
        y=labels,
        shuffle=shuffle,
        batch_size = 5,
        num_epochs = 1
    )
    return input_fn
  
estimator_model.train(input_fn = input_function(X_train,y_train,True))

In [0]:
def dataset_input_fn():
    return X,y
  
#http://tflearn.org/
#http://androidkt.com/train-keras-model-with-tensorflow-estimators-and-datasets-api/
#if the keras model has already be train we can do it after the fact this way. 
#Other option to get model artifacts is to save while initial training
#train_input = lambda: dataset_input_fn(train_data, None)
estimator_model.train(input_fn=dataset_input_fn, steps=10)  

In [0]:
# Next, export the TensorFlow Estimator to SavedModel


#defines the format for the input required during serving prediction[]
def serving_input_receiver_fn():
    input_ph = tf.placeholder(tf.string, shape=[None], name='image_binary')#name of this tensor is used when serving input during prediction
#https://www.tensorflow.org/api_docs/python/tf/map_fn
#https://www.learnpython.org/en/Partial_functions
#https://www.tensorflow.org/api_docs/python/tf/image/decode_image
    images = tf.map_fn(partial(tf.image.decode_image, channels=1), input_ph, dtype=tf.uint8) #specifies how inputs should be unpacked
    images = tf.cast(images, tf.float32) / 255.
    images.set_shape([None, 28, 28, 1]) #(batch_size, x_dim, y_dim, z or image channels)

    #images
    # the first key is the name of first layer of the (keras) model. 
    # The second key is the name of the key that will be passed in the prediction request
    return tf.estimator.export.ServingInputReceiver({model_input_name: images}, {'bytes': input_ph})

In [0]:
export_path = estimator_model.export_savedmodel('./export', serving_input_receiver_fn=serving_input_receiver_fn)
export_path

##Upload model to existing GCS bucket

In [0]:
#https://colab.research.google.com/notebooks/io.ipynb#scrollTo=xM70QWdxeE7q
from google.colab import auth
auth.authenticate_user()

# Existing bucket name
# (GCS buckets are part of a single global namespace.)
bucket_name = 'mltestproject'

# Copy the model directory to our new bucket.
# Full reference: https://cloud.google.com/storage/docs/gsutil/commands/cp
!gsutil cp -r export/1550605828/. gs://{bucket_name}/

##Upload GOOGLE_APPLICATION_CREDENTIALS

In [0]:
#Upload GOOGLE_APPLICATION_CREDENTIALS json file from local computer and save to this notebook
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

##Set GOOGLE_APPLICATION_CREDENTIALS environment variable

In [0]:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'MLTest-1b56c585b43f.json' #INSERT YOUR CREDENTIALS FILENAME HERE!!

##Request online prediction from deployed model

In [0]:
from PIL import Image
import base64
from io import BytesIO

#function to create base64 encoded data for online prediction from image array test data
def exportB64Str(row_num, data=test_data):
  buffered = BytesIO()
 
  #create json test input for online prediction
  array = data.values[row_num,1:].reshape(28, 28)

  img = Image.fromarray(array.astype(np.uint8))

  img.save(buffered, format="PNG")
  img_str = base64.b64encode(buffered.getvalue())
  #print(img_str)
  #strip leading and trailing format characters
  img_str = str(img_str).replace("\"", "").replace("b'", "").replace("'", "") 
  return img_str
# show_img(test_data, row_num)

In [0]:
exportB64Str(30)

In [0]:
#mnistTest1
#Need tensorflow 1.12 for google cloud ml engine
#Setup online cloud model
#https://cloud.google.com/ml-engine/docs/tensorflow/online-predict#requesting_predictions

import googleapiclient 
from googleapiclient import discovery
from googleapiclient import errors


#row_num = 101 #0
#row_num = 107 #1 or index 2
#row_num = 109 #4
#row_num = 0 #7
row_num = 30 #3
#row_num = 100 #6
  
# Create the ML Engine service object.
# To authenticate set the environment variable
# GOOGLE_APPLICATION_CREDENTIALS=<path_to_service_account_file>
#name = 'projects/{}/models/{}'.format(Project ID, 'MpgModel')
service = googleapiclient.discovery.build('ml', 'v1')
name = 'projects/{}/models/{}'.format('mltest-229502', 'MpgModel')

#if version is not None:
name += '/versions/{}'.format('mnistTest1')

response = service.projects().predict(
    name=name,    
    #body= {"instances":[{"bytes": {"b64": "iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAAA3klEQVR4nGNgGMLA+vhqUxxSvH4fzubcb+TBKpm0wpuXwffvfg4sctIfGhgYGBhWX2TDIln0UY6BgcH7ewKauI8CAwPDhrsMDAxy1+/ywUSZGBgYGBhYFmYzMDC8Z2Fj8Dz6xfYTqkbmWw8YGBi8//lGPrmjgmFd4h83BgaBp//+nZXAdAvTwbfBDNon/t0Rx+ZFtU+/rz5cteuHHzZJBmmPSg0G+ed3Ma2EA/Nv+GQ1ruOTlbv+0R23LPfmz5q4ZTkvfI3CLcu+63c+M05Zprzfh5lwaw58wIJbkggAALefQem8NL8aAAAAAElFTkSuQmCC"}}]}
     #body= {"instances":[{"bytes": {"b64": "iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAAAxklEQVR4nNXQoRMBQRQG8M9FV++q7Cp/hEwlC4pRiWTVqEQum5HushmKIxIJEvF97wRmBG+L5m3Ynf3N92b3Af9YYS9VKnXZMHBFea17+XPpvffAu607292t4He/k/VJBUCUCg0EAPiZcuOwQSaUum1VKnUS2ljcU3g59a3PABiclEqt2RpUhpkwcbwJCGfK9vvsfWmU50dXckRtupqOKdeSbdGBmjly8ztlWTTAj2PmPNvTGwmFi8BuOeVj4xgc0Er6Lvqhnm7tXZhILmyQAAAAAElFTkSuQmCC"}}]}
    body= {"instances":[{"bytes": {"b64": exportB64Str(row_num, test_data)}}]}    
        
   # body= {"instances":[{"bytes": {"b64": "iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAAA7ElEQVR4nGNgGBmAEYkpySAv9uvIZ0xFvKF7L/z79+/fv2enw9Fkyt58/vxmZaQLP7/25Bd/VrIiyTm8ftwaKwrjsUz+2Y2Qq3/aI4liUOdvczj77m0WOFuixEaLQfxBL1zAJIMZymKa9uT3/7+zOCacxnQx28LnTuxuXk8Dir9gSvb9c2FgYGBoSSneAjUJydEFWXsZGBgYvjMGnMWQDH80/z/EeOtDGJIMn38yMDAwMBxR27sf6mWE3IdHEFpOv+Afhnu8PgZwMjCwFP0wxnQrA6Pb/edHjlx86YgQQZbm57bl/bD3PRaN9AMAYiZNYC9hK9EAAAAASUVORK5CYII="}}]}
).execute()

if 'error' in response:
    raise RuntimeError(response['error'])
  
response

In [0]:
z = exportB64Str(30, test_data)

In [0]:
#find prediction from response
result = np.argmax(response['predictions'][0]['dense_1'])
class_mapping[result]

##Example saving a trained model to a file and loading a trained model from a file to perform predictions/inference

In [0]:
def export_png(row_num, data=test_data):
    #array = np.transpose(data.values[row_num,1:].reshape(28, 28), axes=[1,0])
    array = data.values[row_num,1:].reshape(28, 28)
    img = Image.fromarray(array.astype(np.uint8))
    filename = 'class_' + str(data.values[row_num,0]) + '_label_' + str(class_mapping[data.values[row_num,0]]) + '.png'
    img.save(filename)

In [0]:
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import tag_constants, signature_constants, signature_def_utils_impl, utils
row_num = 30 #3

# img = load_img('class_3_label_3.png')
# img.thumbnail((28, 28))
# test_x = img_to_array(img) 
# test_x = test_x[:,:,0:1] #select only one channel
# test_x1 = test_x.copy(order='C')

# img_str = base64.b64encode(test_x1)

# print(img_str)
#img_str = str(img_str).replace("\"", "").replace("b'", "").replace("'", "") 

with tf.Session(graph=tf.Graph()) as sess:
    metagraph = tf.saved_model.loader.load(sess, [tag_constants.SERVING], "export/1551073325")
    graph = tf.get_default_graph()
    #inputs_mapping = dict(metagraph.signature_def['serving_default'].inputs)
    #print(inputs_mapping)
    
    #outputs_mapping = dict(metagraph.signature_def['serving_default'].outputs)
    #print(outputs_mapping)
#     for op in graph..get_operations():
#       print(op.name)
    x = graph.get_tensor_by_name("image_binary:0") #input layer name maps to tensor in the graph
    model = graph.get_tensor_by_name("dense_1/Softmax:0") #output layer name  
   
    print(sess.run(model, feed_dict={x: [img_str]}))
    #print(sess.run(model, feed_dict={x: [str(exportB64Str(row_num, test_data))]}))
#    print(sess.run(model, feed_dict={x: [{"b64": "iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAAAxklEQVR4nNXQoRMBQRQG8M9FV++q7Cp/hEwlC4pRiWTVqEQum5HushmKIxIJEvF97wRmBG+L5m3Ynf3N92b3Af9YYS9VKnXZMHBFea17+XPpvffAu607292t4He/k/VJBUCUCg0EAPiZcuOwQSaUum1VKnUS2ljcU3g59a3PABiclEqt2RpUhpkwcbwJCGfK9vvsfWmU50dXckRtupqOKdeSbdGBmjly8ztlWTTAj2PmPNvTGwmFi8BuOeVj4xgc0Er6Lvqhnm7tXZhILmyQAAAAAElFTkSuQmCC"}]}))

In [0]:
from tensorflow.contrib import predictor

predict_fn = predictor.from_saved_model('export/1551073325')
#predictions = predict_fn({'bytes': [b'iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAAAxElEQVR4nN2QIRMBURSFD3FVr8rUlWXZVrJqVCJZ3VGJVjYj7eY3I1mViLpBueeuYGY9M/YHcNL95ptzZ+4F/i1mZ3ulcsw8dLk2Xw4KSOTmu3IrjIHqCzqVwwFoe4WtPPbFTBkCUx7fbN9rlBa1VDl94TpnAKdpW6lQznUAaGXcwG3eE6VSfQDmxEvdkZFQKOGVMwALysfRQaa0PXPW1Jg5Nfr8QjMcAZhQVpaSeviSQKnU2zcFoBtT+o0SCQw7peqX8wRrml8mRYL5pwAAAABJRU5ErkJggg==']}) #ERROR - Unable to decode bytes as JPEG, PNG, GIF, or BMP
#predictions = predict_fn({'bytes': test_data.values[30,1:]}) #ERROR - Unable to get element as bytes
#predictions = predict_fn({'bytes': exportB64Str(30, test_data)}) #ERROR - Cannot feed value of shape () for Tensor 'image_binary:0', which has shape '(?,)'
#predictions = predict_fn({'bytes': [exportB64Str(30, test_data)]}) #ERROR - assertion failed: [Unable to decode bytes as JPEG, PNG, GIF, or BMP]
#predictions = predict_fn({'bytes': [test_data.values[30,1:]]}) #ERROR - Cannot feed value of shape (1, 784) for Tensor 'image_binary:0', which has shape '(?,)'

predictions



In [0]:
model.save('my_model_tf_1_1_1.h5')

In [0]:
from keras.preprocessing.image import array_to_img, img_to_array, load_img
#https://www.kaggle.com/lgmoneda/from-image-files-to-numpy-arrays
#https://keras.io/getting-started/faq/#how-can-i-save-a-keras-model
#https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html
#from keras.models import load_model
#model.save('my_model.h5')

##Process png to proper shape numpy array
img = load_img('class_3_label_3.png')
img.thumbnail((28, 28))
test_x = img_to_array(img) 
test_x = test_x[:,:,0:1] #select only one channel
#   dat = img_file.read()
  #tdat = np.frombuffer(dat, dtype=np.byte)/255
  #imgs = tdat.reshape(1, img_size, img_size, 1) # img_size * img_size arrays
print(test_x.shape)

#
#print(tdat)

model2 =  tf.keras.models.load_model('my_model_tf_1_1_1.h5')
#predictions = model2.predict(X_test[30:31])
predictions = model2.predict([[test_x]])

result = np.argmax(predictions)
class_mapping[result]
