In [0]:
# change these to try this notebook out
BUCKET = 'qwiklabs-gcp-24c583ad897245ef'
PROJECT = 'qwiklabs-gcp-24c583ad897245ef'
REGION = 'us-central1'

In [0]:
import os
os.environ['BUCKET'] = BUCKET
os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION

In [0]:
%%bash
if ! gsutil ls | grep -q gs://${BUCKET}/; then
  gsutil mb -l ${REGION} gs://${BUCKET}
fi

In [0]:
%bash
ls *.csv

In [0]:
import shutil
import numpy as np
import tensorflow as tf

In [0]:
# Determine CSV, label, and key columns
CSV_COLUMNS = 'weight_pounds,is_male,mother_age,plurality,gestation_weeks,key'.split(',')
LABEL_COLUMN = 'weight_pounds'
KEY_COLUMN = 'key'

# Set default values for each CSV column
DEFAULTS = [[0.0], ['null'], [0.0], ['null'], [0.0], ['nokey']]
TRAIN_STEPS = 1000

In [0]:
# Create an input function reading a file using the Dataset API
# Then provide the results to the Estimator API
def read_dataset(filename, mode, batch_size = 512):
  def _input_fn():
    def decode_csv(value_column):
      # TODO #1: Use tf.decode_csv to parse the provided line
      columns = tf.decode_csv(value_column, record_defaults=DEFAULTS)
      # TODO #2: Make a Python dict.  The keys are the column names, the values are from the parsed data
      features = dict(zip(CSV_COLUMNS, columns))
      # TODO #3: Return a tuple of features, label where features is a Python dict and label a float
      label = features.pop(LABEL_COLUMN)
      return features, label
    
    # TODO #4: Use tf.gfile.Glob to create list of files that match pattern
    file_list = None
    file_list = tf.gfile.Glob(filename)
    # Create dataset from file list
    dataset = (tf.data.TextLineDataset(file_list)  # Read text file
                 .map(decode_csv))  # Transform each elem by applying decode_csv fn
    
    # TODO #5: In training mode, shuffle the dataset and repeat indefinitely
    #                (Look at the API for tf.data.dataset shuffle)
    #          The mode input variable will be tf.estimator.ModeKeys.TRAIN if in training mode
    #          Tell the dataset to provide data in batches of batch_size 
    if mode == tf.estimator.ModeKeys.TRAIN:
      num_epochs = None
      dataset = dataset.shuffle(buffer_size=10*batch_size)
    else:
        num_epochs = 1 #end-of-input after this
    dataset = dataset.repeat(num_epochs).batch(batch_size)

    # This will now return batches of features, label
    return dataset.make_one_shot_iterator().get_next()

#     return dataset
  return _input_fn

In [0]:
# Define feature columns
def get_categorical(name, values):
    return tf.feature_column.indicator_column(
        tf.feature_column.categorical_column_with_vocabulary_list(name, values))

def get_cols():
    # define column types
    [\
     get_categorical('is_male', ['False', 'True', 'Unknown']),
     tf.feature_column.numeric_column('mother_age'),
     get_categorical('plurality', ['Single(1)', 'Twins(2)', 'Triplets(3)', 'Quadruplets(4)', 'Quintuplets(5)']),
     tf.feature_column.numeric_column(gestation_weeks')
    ]

In [0]:
# Create serving input function to be able to serve predictions later using provided inputs
def serving_input_fn():
    feature_placeholders = {
        'is_male': tf.placeholder(tf.string, [None]),
        'mother_age': tf.placeholder(tf.float32, [None]),
        'plurality': tf.placeholder(tf.string, [None]),
        'gestation_weeks': tf.placeholder(tf.float32, [None])
    }
    features = {
        key: tf.expand_dims(tensor, -1)
        for key, tensor in feature_placeholders.items()
    }
    return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)

In [0]:
# Create estimator to train and evaluate
def train_and_evaluate(output_dir):
  EVAL_INTERVAL = 300
  run_config = tf.estimator.RunConfig(save_checkpoints_secs = EVAL_INTERVAL,
                                      keep_checkpoint_max = 3)
  # TODO #1: Create your estimator
  # DNN estimator
  estimator = tf.estimator.DNNRegressor(model_dir=output_dir,
                                       feature_columns = get_cols(),
                                       hidden_units = [64, 32],
                                       config= run_config)  
  # Linear Regressor  
  #estimator = tf.estimator.LinearRegressor(model_dir=output_dir, feature_columns = get_cols())
  train_spec = tf.estimator.TrainSpec(
                       # TODO #2: Call read_dataset passing in the training CSV file and the appropriate mode
                       input_fn = read_dataset('train.csv', mode=tf.estimator.ModeKeys.TRAIN),
                       max_steps = TRAIN_STEPS)
  exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
  eval_spec = tf.estimator.EvalSpec(
                       # TODO #3: Call read_dataset passing in the evaluation CSV file and the appropriate mode
                       input_fn = read_dataset('train.csv', mode=tf.estimator.ModeKeys.EVAL),
                       steps = None,
                       start_delay_secs = 60, # start evaluating after N seconds
                       throttle_secs = EVAL_INTERVAL,  # evaluate every N seconds
                       exporters = exporter)
  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

In [0]:
# Run the model
shutil.rmtree('babyweight_trained', ignore_errors = True) # start fresh each time
train_and_evaluate('babyweight_trained')

#Monitor and experiment with training 

In [0]:
from google.datalab.ml import TensorBoard
TensorBoard().start('./babyweight_trained')

In [0]:
for pid in TensorBoard.list()['pid']:
  TensorBoard().stop(pid)
  print('Stopped TensorBoard with pid {}'.format(pid))