In [66]:
#!brew install wget
#!wget -O "/tmp/vocab.txt" "http://ciir.cs.umass.edu/downloads/Antique/tf-ranking/vocab.txt"
#!wget -O "/tmp/train.tfrecords" "http://ciir.cs.umass.edu/downloads/Antique/tf-ranking/ELWC/train.tfrecords"
#!wget -O "/tmp/test.tfrecords" "http://ciir.cs.umass.edu/downloads/Antique/tf-ranking//ELWC/test.tfrecords"

import pandas as pd
import numpy as np
import tensorflow_datasets as tfds

Collecting tensorflow_datasets
  Downloading tensorflow_datasets-3.1.0-py3-none-any.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 1.2 MB/s eta 0:00:01
Collecting future
  Downloading future-0.18.2.tar.gz (829 kB)
[K     |████████████████████████████████| 829 kB 32.4 MB/s eta 0:00:01
[?25hCollecting tensorflow-metadata
  Downloading tensorflow_metadata-0.21.2-py2.py3-none-any.whl (31 kB)
Collecting dill
  Downloading dill-0.3.1.1.tar.gz (151 kB)
[K     |████████████████████████████████| 151 kB 32.2 MB/s eta 0:00:01
[?25hCollecting promise
  Downloading promise-2.3.tar.gz (19 kB)
Collecting tqdm
  Downloading tqdm-4.45.0-py2.py3-none-any.whl (60 kB)
[K     |████████████████████████████████| 60 kB 22.4 MB/s eta 0:00:01
Collecting googleapis-common-protos
  Downloading googleapis-common-protos-1.51.0.tar.gz (35 kB)
Building wheels for collected packages: future, dill, promise, googleapis-common-protos
  Building wheel for future (setup.py) ... [?25ldone
[?25h  Creat

In [2]:
print('Installing TensorFlow 2.1.0. This will take a minute, ignore the warnings.')
!pip install -q tensorflow==2.1.0
import tensorflow as tf

# This is needed for tensorboard compatibility.
!pip uninstall -y grpcio
!pip install -q grpcio>=1.24.3
try:
    from tensorflow_serving.apis import input_pb2
except ImportError:
    !pip install -q tensorflow-serving-api
    from tensorflow_serving.apis import input_pb2

Found existing installation: grpcio 1.28.1
Uninstalling grpcio-1.28.1:
  Successfully uninstalled grpcio-1.28.1


In [76]:
import six
import os
import numpy as np

try:
    import tensorflow_ranking as tfr
except ImportError:
    !pip install -q tensorflow_ranking
    import tensorflow_ranking as tfr

tfr.__version__
tf.compat.v1.enable_eager_execution()

In [46]:
races = pd.read_csv("Data/races.csv")
runs = pd.read_csv("Data/runs.csv")

_DATASET = None

runs["horse_country"] = runs["horse_country"].replace(np.nan, runs["horse_country"].mode()[0])
runs["horse_type"] = runs["horse_type"].replace(np.nan, runs["horse_type"].mode()[0])

COUNTRY_VOCAB = np.unique(np.array(runs["horse_country"]))
TYPE_VOCAB = np.unique(np.array(runs["horse_type"]))
GOING_VOCAB = np.unique(np.array(races["going"]))

# Store the paths to files containing training and test instances.
#_TRAIN_DATA_PATH = "/tmp/train.tfrecords"
#_TEST_DATA_PATH = "/tmp/test.tfrecords"
_TRAIN_DATA_PATH = "Data/train.tfrecords"
_TEST_DATA_PATH = "Data/test.tfrecords"

# Store the vocabulary path for query and document tokens.
_VOCAB_PATH = "/tmp/vocab.txt"

# The maximum number of documents per query in the dataset.
# Document lists are padded or truncated to this size.
_LIST_SIZE = 50

# The document relevance label.
#_LABEL_FEATURE = "relevance"
_LABEL_FEATURE = "horse_placing"

# Padding labels are set negative so that the corresponding examples can be
# ignored in loss and metrics.
_PADDING_LABEL = -1

# Learning rate for optimizer.
_LEARNING_RATE = 0.05

# Parameters to the scoring function.
_BATCH_SIZE = 32
_HIDDEN_LAYER_DIMS = ["64", "32", "16"]
_DROPOUT_RATE = 0.2
_GROUP_SIZE = 1  # Pointwise scoring.

# Location of model directory and number of training steps.
_MODEL_DIR = "Model/ranking_model_dir"
_NUM_TRAIN_STEPS = 15 * 1000

In [73]:
_EMBEDDING_DIMENSION = 20

def context_feature_columns_google():
    """Returns context feature names to column definitions."""
    sparse_column = tf.feature_column.categorical_column_with_vocabulary_file(
      key="query_tokens",
      vocabulary_file=_VOCAB_PATH)
    query_embedding_column = tf.feature_column.embedding_column(
      sparse_column, _EMBEDDING_DIMENSION)
    return {"query_tokens": query_embedding_column}

def context_feature_columns():
    '''CONTEXT FEATURES: distance, surface, going'''
    
    distance_column = tf.feature_column.numeric_column(key="distance")
    
    #distance_column = tf.feature_column.embedding_column(
    #    distance_column, _EMBEDDING_DIMENSION)
    
    surface_column = tf.feature_column.numeric_column(key="surface")
    
    going_column = tf.feature_column.categorical_column_with_vocabulary_list(
        key="going",
        vocabulary_list=GOING_VOCAB)
    going_one_hot_column = tf.feature_column.indicator_column(going_column)
    
    return {"distance":distance_column,
           # "surface":surface_column,
           # "going":going_one_hot_column
           }

def example_feature_columns_google():
    """Returns the example feature columns."""
    sparse_column = tf.feature_column.categorical_column_with_vocabulary_file(
      key="document_tokens",
      vocabulary_file=_VOCAB_PATH)
    document_embedding_column = tf.feature_column.embedding_column(
      sparse_column, _EMBEDDING_DIMENSION)
    return {"document_tokens": document_embedding_column}

def example_feature_columns():
    '''EXAMPLE FEATURES: horse_age, horse_country, horse_type, horse_rating, horse_placing'''
    
    age_column = tf.feature_column.numeric_column(key="horse_age")
    #age_column = tf.feature_column.embedding_column(
    #    age_column, _EMBEDDING_DIMENSION)
    
    country_column = tf.feature_column.categorical_column_with_vocabulary_list(
        key="horse_country",
        vocabulary_list=COUNTRY_VOCAB)
    country_one_hot_column = tf.feature_column.indicator_column(country_column)
    
    type_column = tf.feature_column.categorical_column_with_vocabulary_list(
        key="horse_type",
        vocabulary_list=TYPE_VOCAB)
    type_one_hot_column = tf.feature_column.indicator_column(type_column)
    
    rating_column = tf.feature_column.numeric_column(key="horse_rating")
    
    placing_column = tf.feature_column.numeric_column(key="horse_placing",
                                                      dtype=tf.int64,
                                                      default_value=_PADDING_LABEL)
    return {"horse_age":age_column,
            #"horse_country":country_one_hot_column,
           # "horse_type":type_one_hot_column,
            #"horse_placing":placing_column
           }

def input_fn(path, num_epochs=None):
    context_feature_spec = tf.feature_column.make_parse_example_spec(
        context_feature_columns().values())
    label_column = tf.feature_column.numeric_column(
        _LABEL_FEATURE, dtype=tf.int64, default_value=_PADDING_LABEL)
    example_feature_spec = tf.feature_column.make_parse_example_spec(
        list(example_feature_columns().values()) + [label_column])
    dataset = tfr.data.build_ranking_dataset(
        file_pattern=path,
        data_format=tfr.data.ELWC,
        batch_size=_BATCH_SIZE,
        list_size=_LIST_SIZE,
        context_feature_spec=context_feature_spec,
        example_feature_spec=example_feature_spec,
        reader=tf.data.TFRecordDataset,
        shuffle=False,
        num_epochs=num_epochs)
    _DATASET = dataset
    features = tf.compat.v1.data.make_one_shot_iterator(dataset).get_next()
    label = tf.squeeze(features.pop(_LABEL_FEATURE), axis=2)
    label = tf.cast(label, tf.float32)

    for feat, targ in dataset.take(5):
        print ('Features: {}, Target: {}'.format(feat, targ))
    
    #with tf.compat.v1.Session() as sess:
    #    print(label.eval())
    tf.print(features["distance"])
    print("LABEL")
    tf.print(label)

    return features, label

def make_transform_fn():
    def _transform_fn(features, mode):
        """Defines transform_fn."""
        context_features, example_features = tfr.feature.encode_listwise_features(
            features=features,
            context_feature_columns=context_feature_columns(),
            example_feature_columns=example_feature_columns(),
            mode=mode,
            scope="transform_layer")

        return context_features, example_features
    return _transform_fn

def make_score_fn():
    """Returns a scoring function to build `EstimatorSpec`."""

    def _score_fn(context_features, group_features, mode, params, config):
        """Defines the network to score a group of documents."""
        with tf.compat.v1.name_scope("input_layer"):
            context_input = [
              tf.compat.v1.layers.flatten(context_features[name])
              for name in sorted(context_feature_columns())
          ]
            group_input = [
                tf.compat.v1.layers.flatten(group_features[name])
                  for name in sorted(example_feature_columns())
          ]
            input_layer = tf.concat(context_input + group_input, 1)

            is_training = (mode == tf.estimator.ModeKeys.TRAIN)
            cur_layer = input_layer
            cur_layer = tf.compat.v1.layers.batch_normalization(
                cur_layer,
                training=is_training,
                momentum=0.99)

        for i, layer_width in enumerate(int(d) for d in _HIDDEN_LAYER_DIMS):
            cur_layer = tf.compat.v1.layers.dense(cur_layer, units=layer_width)
            cur_layer = tf.compat.v1.layers.batch_normalization(
                cur_layer,
                training=is_training,
                momentum=0.99)
            cur_layer = tf.nn.relu(cur_layer)
            cur_layer = tf.compat.v1.layers.dropout(
                inputs=cur_layer, rate=_DROPOUT_RATE, training=is_training)
        logits = tf.compat.v1.layers.dense(cur_layer, units=_GROUP_SIZE)
        return logits

    return _score_fn

def eval_metric_fns():
    """Returns a dict from name to metric functions.

    This can be customized as follows. Care must be taken when handling padded
    lists.

    def _auc(labels, predictions, features):
    is_label_valid = tf_reshape(tf.greater_equal(labels, 0.), [-1, 1])
    clean_labels = tf.boolean_mask(tf.reshape(labels, [-1, 1], is_label_valid)
    clean_pred = tf.boolean_maks(tf.reshape(predictions, [-1, 1], is_label_valid)
    return tf.metrics.auc(clean_labels, tf.sigmoid(clean_pred), ...)
    metric_fns["auc"] = _auc

    Returns:
    A dict mapping from metric name to a metric function with above signature.
    """
    metric_fns = {}
    metric_fns.update({
      "metric/ndcg@%d" % topn: tfr.metrics.make_ranking_metric_fn(
          tfr.metrics.RankingMetricKey.NDCG, topn=topn)
      for topn in [1, 3, 5, 10]
    })

    return metric_fns

def _train_op_fn(loss):
    """Defines train op used in ranking head."""
    update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
    minimize_op = optimizer.minimize(
      loss=loss, global_step=tf.compat.v1.train.get_global_step())
    train_op = tf.group([update_ops, minimize_op])
    return train_op

def train_and_eval_fn():
    """Train and eval function used by `tf.estimator.train_and_evaluate`."""
    run_config = tf.estimator.RunConfig(
      save_checkpoints_steps=1000)
    ranker = tf.estimator.Estimator(
      model_fn=model_fn,
      model_dir=_MODEL_DIR,
      config=run_config)

    train_input_fn = lambda: input_fn(_TRAIN_DATA_PATH)
    eval_input_fn = lambda: input_fn(_TEST_DATA_PATH, num_epochs=1)

    train_spec = tf.estimator.TrainSpec(
      input_fn=train_input_fn, max_steps=_NUM_TRAIN_STEPS)
    eval_spec =  tf.estimator.EvalSpec(
          name="eval",
          input_fn=eval_input_fn,
          throttle_secs=15)
    return (ranker, train_spec, eval_spec)


In [74]:
_LOSS = tfr.losses.RankingLossKey.APPROX_NDCG_LOSS
loss_fn = tfr.losses.make_loss_fn(_LOSS)

optimizer = tf.compat.v1.train.AdagradOptimizer(
    learning_rate=_LEARNING_RATE)

ranking_head = tfr.head.create_ranking_head(
      loss_fn=loss_fn,
      eval_metric_fns=eval_metric_fns(),
      train_op_fn=_train_op_fn)

model_fn = tfr.model.make_groupwise_ranking_fn(
          group_score_fn=make_score_fn(),
          transform_fn=make_transform_fn(),
          group_size=_GROUP_SIZE,
          ranking_head=ranking_head)

! rm -rf "Model/ranking_model_dir"  # Clean up the model directory.
ranker, train_spec, eval_spec = train_and_eval_fn()
tf.estimator.train_and_evaluate(ranker, train_spec, eval_spec)

W0430 17:15:33.058172 4499678656 model_fn.py:630] Estimator's model_fn (<function _make_model_fn.<locals>._model_fn at 0x13994ac80>) includes params argument, but params are not passed to Estimator.


RuntimeError: __iter__() is only supported inside of tf.function or when eager execution is enabled.

In [61]:
tf.executing_eagerly()

True

In [62]:
print(_DATASET)

None
