In [1]:
# Revised for our dataset from:
# https://www.tensorflow.org/neural_structured_learning/tutorials/graph_keras_mlp_cora
from __future__ import absolute_import, division, print_function, unicode_literals

import os
import neural_structured_learning as nsl

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf

# Resets notebook state
tf.keras.backend.clear_session()

print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("GPU is", "available" if tf.test.is_gpu_available() else "NOT AVAILABLE")


Version:  2.0.0
Eager mode:  True
GPU is NOT AVAILABLE


In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0


In [6]:
### Experiment dataset
TEST_DATA_PATH  = '../../data/filtered_images_for_training_tf/test.tfr'
TRAIN_DATA_PATH = '../../data/filtered_images_for_training_tf/train.tfr'

### Constants used to identify neighbor features in the input.
NBR_FEATURE_PREFIX = 'NL_nbr_'
NBR_WEIGHT_SUFFIX = '_weight'

In [7]:
class HParams(object):
  def __init__(self):
    # JDW set image size
    #self.input_shape = [255, 255, 1]
    self.input_shape = [1024, 1024, 1]
    # JDW pass in max lenght used for default of 0
    self.max_seq_length = self.input_shape[0] * self.input_shape[1] * self.input_shape[2]
    # TODO JDW Set number of neighbors
    self.num_neighbors = 1
    # **************************************
    self.num_classes = 4
    # JDW Crashes with 64
    #self.conv_filters = [32, 64, 64]
    self.conv_filters = [32, 32, 32]
    self.kernel_size = (3, 3)
    self.pool_size = (2, 2)
    self.num_fc_units = [64]
    self.batch_size = 32
    self.epochs = 5
    self.adv_multiplier = 0.2
    self.adv_step_size = 0.2
    self.adv_grad_norm = 'infinity'

HPARAMS = HParams()

In [9]:
def parse_example(example_proto):
  """Extracts relevant fields from the `example_proto`.

  Args:
    example_proto: An instance of `tf.train.Example`.

  Returns:
    A pair whose first value is a dictionary containing relevant features
    and whose second value contains the ground truth labels.
  """
  # The 'image' feature is a image representation of the
  # original raw image. A default value is required for examples that don't
  # have the feature.
  feature_spec = {
      'image_x':
          tf.io.FixedLenFeature([], tf.int64, default_value=-1),
      'image_y':
          tf.io.FixedLenFeature([], tf.int64, default_value=-1),
      'image':
          tf.io.FixedLenFeature([], tf.string),
      'label':
          tf.io.FixedLenFeature([], tf.int64, default_value=-1),
  }
  # We also extract corresponding neighbor features in a similar manner to
  # the features above.
  #for i in range(HPARAMS.num_neighbors):
  #  nbr_feature_key = '{}{}_{}'.format(NBR_FEATURE_PREFIX, i, 'image')
  #  nbr_weight_key = '{}{}{}'.format(NBR_FEATURE_PREFIX, i, NBR_WEIGHT_SUFFIX)
  #  feature_spec[nbr_feature_key] = tf.io.FixedLenFeature(
  #      [HPARAMS.max_seq_length],
  #      tf.string)

  # We assign a default value of 0.0 for the neighbor weight so that
  # graph regularization is done on samples based on their exact number
  # of neighbors. In other words, non-existent neighbors are discounted.
  #feature_spec[nbr_weight_key] = tf.io.FixedLenFeature([1], tf.float32, default_value=tf.constant([0.0]))

  #print(feature_spec.keys())
  features = tf.io.parse_single_example(example_proto, feature_spec)

  labels = features.pop('label')
  return features, labels


def make_dataset(file_path, training=False):
  """Creates a `tf.data.TFRecordDataset`.

  Args:
    file_path: Name of the file in the `.tfrecord` format containing
      `tf.train.Example` objects.
    training: Boolean indicating if we are in training mode.

  Returns:
    An instance of `tf.data.TFRecordDataset` containing the `tf.train.Example`
    objects.
  """
  dataset = tf.data.TFRecordDataset([file_path])
  if training:
    dataset = dataset.shuffle(10000)
  dataset = dataset.map(parse_example)
  dataset = dataset.batch(HPARAMS.batch_size)
  return dataset

# Make training and test sets
train_dataset = make_dataset(TRAIN_DATA_PATH, training=True)
test_dataset = make_dataset(TEST_DATA_PATH)

In [10]:
print(train_dataset)

<BatchDataset shapes: ({image: (None,), image_x: (None,), image_y: (None,)}, (None,)), types: ({image: tf.string, image_x: tf.int64, image_y: tf.int64}, tf.int64)>


In [None]:
#def normalize(features):
#  features[IMAGE_INPUT_NAME] = tf.cast(
#      features[IMAGE_INPUT_NAME], dtype=tf.float32) / 255.0
#  return features
#
#def convert_to_tuples(features):
#  return features[IMAGE_INPUT_NAME], features[LABEL_INPUT_NAME]
#
#def convert_to_dictionaries(image, label):
#  return {IMAGE_INPUT_NAME: image, LABEL_INPUT_NAME: label}
#
#train_dataset = train_dataset.map(normalize).shuffle(10000).batch(HPARAMS.batch_size).map(convert_to_tuples)
#test_dataset = test_dataset.map(normalize).batch(HPARAMS.batch_size).map(convert_to_tuples)

In [None]:
# I think I want to do this instead
## JDW 
#IMAGE_INPUT_NAME = "image"
#
#def build_base_model(hparams):
#  """Builds a model according to the architecture defined in `hparams`."""
#  inputs = tf.keras.Input(
#      shape=hparams.input_shape, dtype=tf.float32, name=IMAGE_INPUT_NAME)
#
#  x = inputs
#  for i, num_filters in enumerate(hparams.conv_filters):
#    x = tf.keras.layers.Conv2D(
#        num_filters, hparams.kernel_size, activation='relu')(x)
#    if i < len(hparams.conv_filters) - 1:
#     # max pooling between convolutional layers
#      x = tf.keras.layers.MaxPooling2D(hparams.pool_size)(x)
#  x = tf.keras.layers.Flatten()(x)
#  for num_units in hparams.num_fc_units:
#   x = tf.keras.layers.Dense(num_units, activation='relu', name="relu")(x)
#  pred = tf.keras.layers.Dense(hparams.num_classes, activation='softmax', name="softmax")(x)
#  model = tf.keras.Model(inputs=inputs, outputs=pred, name="Natural Disaster Analysis")
#  return model
#
## Generate Base Model
#base_model = build_base_model(HPARAMS)
#base_model.summary()

In [None]:
#base_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',
#                   metrics=['accuracy'])
#base_model.fit(train_dataset, epochs=HPARAMS.epochs)

In [None]:
#results = base_model.evaluate(test_dataset)
#named_results = dict(zip(base_model.metrics_names, results))
#print('accuracy:', named_results['accuracy'])

In [None]:
for feature_batch, label_batch in train_dataset.take(1):
  print('Feature list:', list(feature_batch.keys()))
  print('Batch of inputs:', feature_batch['image'])
  nbr_feature_key = '{}{}_{}'.format(NBR_FEATURE_PREFIX, 0, 'image')
  nbr_weight_key = '{}{}{}'.format(NBR_FEATURE_PREFIX, 0, NBR_WEIGHT_SUFFIX)
  print('Batch of neighbor inputs:', feature_batch[nbr_feature_key])
  print('Batch of neighbor weights:',
        tf.reshape(feature_batch[nbr_weight_key], [-1]))
  print('Batch of labels:', label_batch)

In [None]:
def make_mlp_functional_model(hparams):
  """Creates a functional API-based multi-layer perceptron model."""
  inputs = tf.keras.Input(
      shape=(hparams.max_seq_length,), dtype='string', name='image')

    # JDW
 # # Input is already one-hot encoded in the integer format. We cast it to
 # # floating point format here.
 # cur_layer = tf.keras.layers.Lambda(
 #     lambda x: tf.keras.backend.cast(x, tf.float32))(
 #         inputs)

  for num_units in hparams.num_fc_units:
    cur_layer = tf.keras.layers.Dense(num_units, activation='relu')(cur_layer)
    # For functional models, by default, Keras ensures that the 'dropout' layer
    # is invoked only during training.
    cur_layer = tf.keras.layers.Dropout(hparams.dropout_rate)(cur_layer)

  outputs = tf.keras.layers.Dense(
      hparams.num_classes, activation='softmax')(
          cur_layer)

  model = tf.keras.Model(inputs, outputs=outputs)
  return model

In [None]:
# Create a base MLP model using the functional API.
# Alternatively, you can also create a sequential or subclass base model using
# the make_mlp_sequential_model() or make_mlp_subclass_model() functions
# respectively, defined above. Note that if a subclass model is used, its
# summary cannot be generated until it is built.
base_model_tag, base_model = 'FUNCTIONAL', make_mlp_functional_model(HPARAMS)
base_model.summary()

In [None]:
# Compile and train the base MLP model
base_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])
base_model.fit(train_dataset, epochs=HPARAMS.train_epochs, verbose=1)

In [None]:
# Helper function to print evaluation metrics.
def print_metrics(model_desc, eval_metrics):
  """Prints evaluation metrics.

  Args:
    model_desc: A description of the model.
    eval_metrics: A dictionary mapping metric names to corresponding values. It
      must contain the loss and accuracy metrics.
  """
  print('\n')
  print('Eval accuracy for ', model_desc, ': ', eval_metrics['accuracy'])
  print('Eval loss for ', model_desc, ': ', eval_metrics['loss'])
  if 'graph_loss' in eval_metrics:
    print('Eval graph loss for ', model_desc, ': ', eval_metrics['graph_loss'])

eval_results = dict(
    zip(base_model.metrics_names,
        base_model.evaluate(test_dataset, steps=HPARAMS.eval_steps)))
print_metrics('Base MLP model', eval_results)

In [None]:
# Build a new base MLP model.
base_reg_model_tag, base_reg_model = 'FUNCTIONAL', make_mlp_functional_model(
    HPARAMS)

# Wrap the base MLP model with graph regularization.
graph_reg_config = nsl.configs.make_graph_reg_config(
    max_neighbors=HPARAMS.num_neighbors,
    multiplier=HPARAMS.graph_regularization_multiplier,
    distance_type=HPARAMS.distance_type,
    sum_over_axis=-1)
graph_reg_model = nsl.keras.GraphRegularization(base_reg_model,
                                                graph_reg_config)
graph_reg_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])
graph_reg_model.fit(train_dataset, epochs=HPARAMS.train_epochs, verbose=1)

In [None]:
eval_results = dict(
    zip(graph_reg_model.metrics_names,
        graph_reg_model.evaluate(test_dataset, steps=HPARAMS.eval_steps)))
print_metrics('MLP + graph regularization', eval_results)