In [24]:
"""
sources:
https://www.youtube.com/watch?v=bqeUmLCgsVw
https://github.com/kalaspuffar/tensorflow-data/blob/master/train.py

https://www.youtube.com/watch?v=ot4RWfGTtOg
"""

import tensorflow as tf
import sys

sess = tf.Session()


def parser(record):
    """
    Takes one record from the tfrecord file and parses image and label out
    """
    keys_to_features = {
        "image_raw": tf.FixedLenFeature([], tf.string),
        "label":     tf.FixedLenFeature([], tf.int64)
    }
    # parse one file
    parsed = tf.parse_single_example(record, keys_to_features)
    # extract image and convert it to float format which is required by the network
    image = tf.decode_raw(parsed["image_raw"], tf.uint8)
    image = tf.cast(image, tf.float32)
    image = tf.reshape(image, shape=[299, 299, 3]) # delete this?
    # read out the label
    label = tf.cast(parsed["label"], tf.int32)
    return {'image': image}, label

def input_fn(filenames):
    dataset = tf.data.TFRecordDataset(filenames=filenames, num_parallel_reads=32)
    dataset = dataset.apply(tf.data.experimental.shuffle_and_repeat(1024, 1)) # (sliding window, num_epochs)
    dataset = dataset.apply(tf.data.experimental.map_and_batch(parser, 32))
    #dataset = dataset.map(parser, num_parallel_calls=12)
    #dataset = dataset.batch(batch_size=1000)
    dataset = dataset.prefetch(buffer_size=2)
    return dataset

def train_input_fn(params):
    return input_fn(filenames=["gs://data-imr-unisg/tfrecords/train.tfrecords"])

def val_input_fn(params):
    return input_fn(filenames=["gs://data-imr-unisg/tfrecords/test.tfrecords"])

def model_fn(features, labels, mode, params):
    num_classes = 2
    net = features["image"]
    net = tf.identity(net, name="input_tensor")
    net = tf.reshape(net, [-1, 299, 299, 3])
    net = tf.identity(net, name="input_tensor_after")
    net = tf.layers.conv2d(inputs=net, name='layer_conv1',
                           filters=32, kernel_size=3,
                           padding='same', activation=tf.nn.relu)
    net = tf.layers.max_pooling2d(inputs=net, pool_size=2, strides=2)
    net = tf.layers.conv2d(inputs=net, name='layer_conv2',
                           filters=64, kernel_size=3,
                           padding='same', activation=tf.nn.relu)
    net = tf.layers.max_pooling2d(inputs=net, pool_size=2, strides=2)
    net = tf.layers.conv2d(inputs=net, name='layer_conv3',
                           filters=64, kernel_size=3,
                           padding='same', activation=tf.nn.relu)
    net = tf.layers.max_pooling2d(inputs=net, pool_size=2, strides=2)
    net = tf.contrib.layers.flatten(net)
    net = tf.layers.dense(inputs=net, name='layer_fc1',
                        units=128, activation=tf.nn.relu)
    net = tf.layers.dropout(net, rate=0.5, noise_shape=None,
                        seed=None, training=(mode == tf.estimator.ModeKeys.TRAIN))
    net = tf.layers.dense(inputs=net, name='layer_fc_2',
                        units=num_classes)

    logits = net

    y_pred = tf.nn.softmax(logits=logits)
    y_pred = tf.identity(y_pred, name="output_pred")
    y_pred_cls = tf.argmax(y_pred, axis=1)
    y_pred_cls = tf.identity(y_pred_cls, name="output_cls")

    if mode == tf.estimator.ModeKeys.PREDICT:
        spec = tf.estimator.EstimatorSpec(mode=mode, predictions=y_pred_cls)
    else:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)
        loss = tf.reduce_mean(cross_entropy)
        optimizer = tf.train.AdamOptimizer(learning_rate=params["learning_rate"])
        train_op = optimizer.minimize(
            loss=loss, global_step=tf.train.get_global_step())
        
        metrics = {
                "accuracy": tf.metrics.accuracy(labels, y_pred_cls)
            }
        
        def my_metric_fn(labels_, y_pred_cls_):
            metrics = {
                "accuracy": tf.metrics.accuracy(labels_, y_pred_cls_)
            }
        
        if FLAGS.use_tpu:
            optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)

        spec = tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss,
            train_op=train_op,
            eval_metric_ops=metrics)
        
        if FLAGS.use_tpu:
            spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=loss,
                train_op=train_op,
                eval_metric_ops=(my_metric_fn, [labels, y_pred_cls])
            )

    return spec

In [None]:
# normal estimator
model = tf.estimator.Estimator(model_fn=model_fn,
                               params={"learning_rate": 1e-4},
                               model_dir="./model5/")

In [None]:
# tpu estimator for running locally (works)
model = tf.contrib.tpu.TPUEstimator(
    model_fn=model_fn,
    config=tf.contrib.tpu.RunConfig(),
    use_tpu=False,
    params={"learning_rate": 1e-4},
    model_dir="./model5/")

In [9]:
count = 0
while (count < 10):
    model.train(input_fn=train_input_fn, steps=1)
    result = model.evaluate(input_fn=val_input_fn)
    print(result)
    print("Classification accuracy: {0:.2%}".format(result["accuracy"]))
    sys.stdout.flush()
    count = count + 1

INFO:tensorflow:Using config: {'_model_dir': './model5/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fc3e9d4efd0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=2, num_shards=None, num_cores_per_replica=None, per_host_input_for_training=2, tpu_job_name=None, initial_infeed_sleep_secs=None, input_part

INFO:tensorflow:Loss for final step: 32.991867.
INFO:tensorflow:training_loop marked as finished
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running eval on CPU
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-03-15-07:59:52
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model5/model.ckpt-8
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-03-15-08:00:58
INFO:tensorflow:Saving dict for global step 8: accuracy = 0.7209302, global_step = 8, loss = 18.015594
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 8: ./model5/model.ckpt-8
INFO:tensorflow:evaluation_loop marked as finished
{'accuracy': 0.7209302, 'loss': 18.015594, 'global_step': 8}
Classification accuracy: 72.09%
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running train on CPU
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
I

In [40]:
import tempfile
import subprocess

class FLAGS(object):
  use_tpu=True
  tpu_name="dominique-c-a-paul"
  model_dir = tempfile.mkdtemp("model5") # Use a local temporary path for the `model_dir`
  iterations = 50 # Number of training steps to run on the Cloud TPU before returning control.
  num_shards = 8  # A single Cloud TPU has 8 shards.

if FLAGS.use_tpu:
    my_project_name = subprocess.check_output(['gcloud','config','get-value','project'])
    my_zone = subprocess.check_output(['gcloud','config','get-value','compute/zone'])
    cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name,
            zone=my_zone,
            project=my_project_name)
    master = tpu_cluster_resolver.get_master()
else:
    master = ''

my_tpu_run_config = tf.contrib.tpu.RunConfig(
    master=master,
    evaluation_master=master,
    model_dir=FLAGS.model_dir,
    session_config=tf.ConfigProto(
        allow_soft_placement=True, log_device_placement=True),
    tpu_config=tf.contrib.tpu.TPUConfig(FLAGS.iterations,
                                        FLAGS.num_shards),
)

ImportError: googleapiclient and oauth2client must be installed before using the TPU cluster resolver. Execute: `pip install --upgrade google-api-python-client` and `pip install --upgrade oauth2client` to install with pip.

In [39]:
!pip install --upgrade google-api-python-client --force-reinstall
!pip install --upgrade oauth2client --force-reinstall

Collecting google-api-python-client
  Using cached https://files.pythonhosted.org/packages/55/e9/e8fb2e3a031cb69b9524b80a92b126665d9a17421700a219555e3233ab6a/google_api_python_client-1.7.8-py3-none-any.whl
Collecting google-auth-httplib2>=0.0.3 (from google-api-python-client)
  Using cached https://files.pythonhosted.org/packages/33/49/c814d6d438b823441552198f096fcd0377fd6c88714dbed34f1d3c8c4389/google_auth_httplib2-0.0.3-py2.py3-none-any.whl
Collecting six<2dev,>=1.6.1 (from google-api-python-client)
  Downloading https://files.pythonhosted.org/packages/73/fb/00a976f728d0d1fecfe898238ce23f502a721c0ac0ecfedb80e0d88c64e9/six-1.12.0-py2.py3-none-any.whl
Collecting google-auth>=1.4.1 (from google-api-python-client)
  Using cached https://files.pythonhosted.org/packages/c5/9b/ed0516cc1f7609fb0217e3057ff4f0f9f3e3ce79a369c6af4a6c5ca25664/google_auth-1.6.3-py2.py3-none-any.whl
Collecting uritemplate<4dev,>=3.0.0 (from google-api-python-client)
  Using cached https://files.pythonhosted.org/pac

In [None]:
# create a TPU estimator
tpu_estimator = tf.contrib.tpu.TPUEstimator(
    model_fn=model_fn,
    config = my_tpu_run_config,
    use_tpu=FLAGS.use_tpu,
    params={"learning_rate": 1e-4},
    #model_dir="./model5/"
)

# create an estimator for local running on CPU
cpu_estimator = tf.contrib.tpu.TPUEstimator(
    model_fn=model_fn,
    config=tf.contrib.tpu.RunConfig(),
    use_tpu=False,
    params={"learning_rate": 1e-4},
    #model_dir="./model5/"
)

In [None]:
count = 0
while (count < 10):
    cpu_estimator.train(input_fn=train_input_fn, steps=1)
    result = cpu_estimator.evaluate(input_fn=val_input_fn)
    print(result)
    print("Classification accuracy: {0:.2%}".format(result["accuracy"]))
    sys.stdout.flush()
    count = count + 1

In [None]:
## High school kid tutorial 
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
      FLAGS.tpu_name,
      zone=FLAGS.tpu_zone,
      project=FLAGS.gcp_project)
config = tf.contrib.tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      model_dir=FLAGS.model_dir,
      tpu_config=tf.contrib.tpu.TPUConfig(
          num_shards=FLAGS.num_shards,
          iterations_per_loop=FLAGS.iterations_per_loop))