In [1]:
import pathlib

import tensorflow as tf
import tensorflow_ranking as tfr
import tensorflow_text as tf_text
from tensorflow_serving.apis import input_pb2
from google.protobuf import text_format

2023-04-07 19:33:38.214257: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-07 19:33:40.042273: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/simin/miniconda3/envs/tf-rank/lib/:/home/simin/miniconda3/envs/tf-rank/lib/python3.10/site-packages/nvidia/cudnn/lib:/home/simin/miniconda3/envs/tf-rank/lib/:/home/simin/miniconda3/envs/tf-rank/lib/python3.10/site-packages/nvidia/cudnn/lib
2023-04-07 19:33:40.042492: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin

In [21]:
# Store the paths to files containing training and test instances.
_TRAIN_DATA_PATH = "/tmp/train.tfrecords"
_TEST_DATA_PATH = "/tmp/test.tfrecords"

# Store the vocabulary path for query and document tokens.
_VOCAB_PATH = "/tmp/vocab.txt"

# The maximum number of documents per query in the dataset.
# Document lists are padded or truncated to this size.
_LIST_SIZE = 50

# The document relevance label.
_LABEL_FEATURE = "relevance"

# Padding labels are set negative so that the corresponding examples can be
# ignored in loss and metrics.
_PADDING_LABEL = -1

# Learning rate for optimizer.
_LEARNING_RATE = 0.05

# Parameters to the scoring function.
_BATCH_SIZE = 32
_HIDDEN_LAYER_DIMS = ["64", "32", "16"]
_DROPOUT_RATE = 0.8
_GROUP_SIZE = 1  # Pointwise scoring.

# Location of model directory and number of training steps.
_MODEL_DIR = "/tmp/ranking_model_dir"
_NUM_TRAIN_STEPS = 5 * 1000
_EMBEDDING_DIMENSION = 20

In [3]:
def context_feature_columns():
    """Returns context feature names to column definitions."""
    sparse_column = tf.feature_column.categorical_column_with_vocabulary_file(
        key="query_tokens",
        vocabulary_file=_VOCAB_PATH)
    query_embedding_column = tf.feature_column.embedding_column(
        sparse_column, _EMBEDDING_DIMENSION)
    return {"query_tokens": query_embedding_column}


def example_feature_columns():
    """Returns the example feature columns."""
    sparse_column = tf.feature_column.categorical_column_with_vocabulary_file(
        key="document_tokens",
        vocabulary_file=_VOCAB_PATH)
    document_embedding_column = tf.feature_column.embedding_column(
        sparse_column, _EMBEDDING_DIMENSION)
    return {"document_tokens": document_embedding_column}

In [4]:
def input_fn(path, num_epochs=None):
    context_feature_spec = tf.feature_column.make_parse_example_spec(
        context_feature_columns().values())
    label_column = tf.feature_column.numeric_column(
        _LABEL_FEATURE, dtype=tf.int64, default_value=_PADDING_LABEL)
    example_feature_spec = tf.feature_column.make_parse_example_spec(
        list(example_feature_columns().values()) + [label_column])
    dataset = tfr.data.build_ranking_dataset(
        file_pattern=path,
        data_format=tfr.data.ELWC,
        batch_size=_BATCH_SIZE,
        list_size=_LIST_SIZE,
        context_feature_spec=context_feature_spec,
        example_feature_spec=example_feature_spec,
        reader=tf.data.TFRecordDataset,
        shuffle=False,
        num_epochs=num_epochs)
    features = tf.compat.v1.data.make_one_shot_iterator(dataset).get_next()
    label = tf.squeeze(features.pop(_LABEL_FEATURE), axis=2)
    label = tf.cast(label, tf.float32)

    return features, label

In [5]:
def make_transform_fn():
    def _transform_fn(features, mode):
        """Defines transform_fn."""
        context_features, example_features = tfr.feature.encode_listwise_features(
            features=features,
            context_feature_columns=context_feature_columns(),
            example_feature_columns=example_feature_columns(),
            mode=mode,
            scope="transform_layer")

        return context_features, example_features
    return _transform_fn

In [6]:
def make_score_fn():
    """Returns a scoring function to build `EstimatorSpec`."""

    def _score_fn(context_features, group_features, mode, params, config):
        """Defines the network to score a group of documents."""
        with tf.compat.v1.name_scope("input_layer"):
            context_input = [
                tf.compat.v1.layers.flatten(context_features[name])
                for name in sorted(context_feature_columns())
            ]
            group_input = [
                tf.compat.v1.layers.flatten(group_features[name])
                for name in sorted(example_feature_columns())
            ]
            input_layer = tf.concat(context_input + group_input, 1)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        cur_layer = input_layer
        cur_layer = tf.compat.v1.layers.batch_normalization(
            cur_layer,
            training=is_training,
            momentum=0.99)

        for i, layer_width in enumerate(int(d) for d in _HIDDEN_LAYER_DIMS):
            cur_layer = tf.compat.v1.layers.dense(cur_layer, units=layer_width)
            cur_layer = tf.compat.v1.layers.batch_normalization(
                cur_layer,
                training=is_training,
                momentum=0.99)
            cur_layer = tf.nn.relu(cur_layer)
            cur_layer = tf.compat.v1.layers.dropout(
                inputs=cur_layer, rate=_DROPOUT_RATE, training=is_training)
        logits = tf.compat.v1.layers.dense(cur_layer, units=_GROUP_SIZE)
        return logits

    return _score_fn

In [7]:
def eval_metric_fns():
    """Returns a dict from name to metric functions.

    This can be customized as follows. Care must be taken when handling padded
    lists.

    def _auc(labels, predictions, features):
        is_label_valid = tf_reshape(tf.greater_equal(labels, 0.), [-1, 1])
        clean_labels = tf.boolean_mask(tf.reshape(labels, [-1, 1], is_label_valid)
        clean_pred = tf.boolean_maks(tf.reshape(predictions, [-1, 1], is_label_valid)
        return tf.metrics.auc(clean_labels, tf.sigmoid(clean_pred), ...)
    metric_fns["auc"] = _auc

    Returns:
        A dict mapping from metric name to a metric function with above signature.
    """
    metric_fns = {}
    metric_fns.update({
        f"metric/ndcg@{topn}": tfr.metrics.make_ranking_metric_fn(
            tfr.metrics.RankingMetricKey.NDCG, topn=topn)
        for topn in [1, 3, 5, 10]
    })

    return metric_fns

## Prepare dataset

In [8]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    tokens = value.split()
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[token.encode() for token in tokens]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [9]:
def create_tfexample(feature0, feature1):
    """
    Creates a tf.train.Example message ready to be written to a file.
    """
    # Create a dictionary mapping the feature name to the tf.train.Example-compatible
    # data type.
    feature = {
        'document_tokens': _bytes_feature(feature0),
        'relevance': _int64_feature(feature1), 
    }

    # Create a Features message using tf.train.Example.

    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto

In [10]:
def create_tfquery(feature0):
    """
    Creates a tf.train.Example message ready to be written to a file.
    """
    # Create a dictionary mapping the feature name to the tf.train.Example-compatible
    # data type.
    feature = {
        'query_tokens': _bytes_feature(feature0),
    }

    # Create a Features message using tf.train.Example.

    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto

In [11]:
def parse_elwc(elwc,num_example):
    return tfr.data.parse_from_example_list(
        [elwc],
        list_size=num_example,
        context_feature_spec={"query_tokens": tf.io.RaggedFeature(dtype=tf.string)},
        example_feature_spec={
            "document_tokens":
                tf.io.RaggedFeature(dtype=tf.string),
            "relevance":
                tf.io.FixedLenFeature(shape=[], dtype=tf.int64, default_value=0)
        },
        size_feature_name="_list_size_",
        mask_feature_name="_mask_")

In [47]:
def create_serialized_elwc(tf_query, tf_examples):
    ELWC = input_pb2.ExampleListWithContext()
    ELWC.context.CopyFrom(tf_query)

    for example in tf_examples:
        example_features = ELWC.examples.add()
        example_features.CopyFrom(example)
    
    return ELWC.SerializeToString()

### sample test

In [None]:
vocab_list = ["this","is","a","relevant","answer","irrelevant","data","query"]
with open("vocab.txt", "w") as file1:
    for token in vocab_list:
        file1.write(token+"\n")

In [None]:
tf_example = create_tfexample("this is a relevant answer",1)
tf_example2 = create_tfexample("irrelevant data",0)
tf_example3 = create_tfexample("relevant data",1)
tf_example4 = create_tfexample("irrelevant data",0)
tf_example4

In [13]:
EXAMPLES = [tf_example, tf_example2, tf_example3, tf_example4]

In [None]:
tf_query = create_tfquery("this is a query")
tf_query

In [None]:
ELWC = input_pb2.ExampleListWithContext()
ELWC.context.CopyFrom(tf_query)

for example in EXAMPLES:
    example_features = ELWC.examples.add()
    example_features.CopyFrom(example)

print(ELWC)

In [16]:
serialized_elwc = ELWC.SerializeToString()
print(serialized_elwc)

b"\nL\nJ\n4\n\x0fdocument_tokens\x12!\n\x1f\n\x04this\n\x02is\n\x01a\n\x08relevant\n\x06answer\n\x12\n\trelevance\x12\x05\x1a\x03\n\x01\x01\n?\n=\n\x12\n\trelevance\x12\x05\x1a\x03\n\x01\x00\n'\n\x0fdocument_tokens\x12\x14\n\x12\n\nirrelevant\n\x04data\n=\n;\n\x12\n\trelevance\x12\x05\x1a\x03\n\x01\x01\n%\n\x0fdocument_tokens\x12\x12\n\x10\n\x08relevant\n\x04data\n?\n=\n\x12\n\trelevance\x12\x05\x1a\x03\n\x01\x00\n'\n\x0fdocument_tokens\x12\x14\n\x12\n\nirrelevant\n\x04data\x12*\n(\n&\n\x0cquery_tokens\x12\x16\n\x14\n\x04this\n\x02is\n\x01a\n\x05query"


In [48]:
serialized_elwc = create_serialized_elwc(tf_query, EXAMPLES)

In [51]:
serialized_elwc2 = create_serialized_elwc(tf_query, EXAMPLES)
serialized_elwc3 = create_serialized_elwc(tf_query, EXAMPLES)

In [53]:
serialized_elwc_list = [serialized_elwc, serialized_elwc2, serialized_elwc3]

In [17]:
tf_data = parse_elwc(serialized_elwc, len(EXAMPLES))

In [None]:
with tf.io.TFRecordWriter('test.tfrecords') as writer:
    writer.write(serialized_elwc)

In [54]:
with tf.io.TFRecordWriter('train.tfrecords') as writer:
    for objs in serialized_elwc_list:
        writer.write(objs)

In [55]:
raw_dataset = tf.data.TFRecordDataset("train.tfrecords")
raw_dataset

<TFRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [None]:
for raw_record in raw_dataset.take(10):
    print(repr(raw_record))

In [37]:
train_data = tf.data.TFRecordDataset(_TRAIN_DATA_PATH)
train_data

<TFRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [None]:
for train_record in train_data.take(1):
    print(repr(train_record))

### stackoverflow

## Losses, Metrics and Ranking Head

In [22]:
# Define a loss function. To find a complete list of available
# loss functions or to learn how to add your own custom function
# please refer to the tensorflow_ranking.losses module.

_LOSS = tfr.losses.RankingLossKey.APPROX_NDCG_LOSS
loss_fn = tfr.losses.make_loss_fn(_LOSS)

In [23]:
optimizer = tf.compat.v1.train.AdagradOptimizer(
    learning_rate=_LEARNING_RATE)


def _train_op_fn(loss):
    """Defines train op used in ranking head."""
    update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
    minimize_op = optimizer.minimize(
        loss=loss, global_step=tf.compat.v1.train.get_global_step())
    train_op = tf.group([update_ops, minimize_op])
    return train_op


ranking_head = tfr.head.create_ranking_head(
      loss_fn=loss_fn,
      eval_metric_fns=eval_metric_fns(),
      train_op_fn=_train_op_fn)

In [24]:
model_fn = tfr.model.make_groupwise_ranking_fn(
          group_score_fn=make_score_fn(),
          transform_fn=make_transform_fn(),
          group_size=_GROUP_SIZE,
          ranking_head=ranking_head)

INFO:tensorflow:Building groupwise ranking model.


## Train and evaluate

In [25]:
def train_and_eval_fn():
    """Train and eval function used by `tf.estimator.train_and_evaluate`."""
    run_config = tf.estimator.RunConfig(
        save_checkpoints_steps=1000)
    ranker = tf.estimator.Estimator(
        model_fn=model_fn,
        model_dir=_MODEL_DIR,
        config=run_config)

    train_input_fn = lambda: input_fn(_TRAIN_DATA_PATH)
    eval_input_fn = lambda: input_fn(_TEST_DATA_PATH, num_epochs=1)

    train_spec = tf.estimator.TrainSpec(
        input_fn=train_input_fn, max_steps=_NUM_TRAIN_STEPS)
    eval_spec = tf.estimator.EvalSpec(
        name="eval",
        input_fn=eval_input_fn,
        throttle_secs=15)
    return (ranker, train_spec, eval_spec)

In [26]:
! rm -rf "/tmp/ranking_model_dir"  # Clean up the model directory.
ranker, train_spec, eval_spec = train_and_eval_fn()
tf.estimator.train_and_evaluate(ranker, train_spec, eval_spec)

/bin/bash: /home/simin/miniconda3/envs/tf-rank/lib/libtinfo.so.6: no version information available (required by /bin/bash)
INFO:tensorflow:Using config: {'_model_dir': '/tmp/ranking_model_dir', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 1000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_rep

  tf.compat.v1.layers.flatten(context_features[name])
  tf.compat.v1.layers.flatten(group_features[name])
  cur_layer = tf.compat.v1.layers.batch_normalization(
  cur_layer = tf.compat.v1.layers.dense(cur_layer, units=layer_width)
  cur_layer = tf.compat.v1.layers.batch_normalization(
  cur_layer = tf.compat.v1.layers.dropout(
  logits = tf.compat.v1.layers.dense(cur_layer, units=_GROUP_SIZE)


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.


2023-04-07 19:36:58.496604: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:36:58.497399: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:36:58.497956: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:36:58.498779: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:36:58.498814: I tensorflow/core/co

INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into /tmp/ranking_model_dir/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = -0.85622907, step = 0


2023-04-07 19:37:01.688919: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


INFO:tensorflow:global_step/sec: 36.5057
INFO:tensorflow:loss = -0.86425054, step = 100 (2.740 sec)
INFO:tensorflow:global_step/sec: 40.3126
INFO:tensorflow:loss = -0.832706, step = 200 (2.480 sec)
INFO:tensorflow:global_step/sec: 43.1262
INFO:tensorflow:loss = -0.83526194, step = 300 (2.319 sec)
INFO:tensorflow:global_step/sec: 42.4205
INFO:tensorflow:loss = -0.78599846, step = 400 (2.357 sec)
INFO:tensorflow:global_step/sec: 43.2698
INFO:tensorflow:loss = -0.854683, step = 500 (2.311 sec)
INFO:tensorflow:global_step/sec: 43.4441
INFO:tensorflow:loss = -0.86314833, step = 600 (2.302 sec)
INFO:tensorflow:global_step/sec: 42.6228
INFO:tensorflow:loss = -0.8332889, step = 700 (2.345 sec)
INFO:tensorflow:global_step/sec: 44.4663
INFO:tensorflow:loss = -0.85919327, step = 800 (2.249 sec)
INFO:tensorflow:global_step/sec: 44.0756
INFO:tensorflow:loss = -0.8144463, step = 900 (2.269 sec)
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 1000...
INFO:tensorflow:Saving check

2023-04-07 19:37:26.634419: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:37:26.635029: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:37:26.635536: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:37:26.636380: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:37:26.636416: I tensorflow/core/co

INFO:tensorflow:Inference Time : 1.19033s
INFO:tensorflow:Finished evaluation at 2023-04-07-19:37:27
INFO:tensorflow:Saving dict for global step 1000: global_step = 1000, labels_mean = 1.9630322, logits_mean = 1.2490524, loss = -0.77059895, metric/ndcg@1 = 0.5878571, metric/ndcg@10 = 0.7951875, metric/ndcg@3 = 0.67231107, metric/ndcg@5 = 0.72104204
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: /tmp/ranking_model_dir/model.ckpt-1000
INFO:tensorflow:global_step/sec: 20.6239
INFO:tensorflow:loss = -0.8284209, step = 1000 (4.849 sec)
INFO:tensorflow:global_step/sec: 40.1428
INFO:tensorflow:loss = -0.87761724, step = 1100 (2.491 sec)
INFO:tensorflow:global_step/sec: 43.0684
INFO:tensorflow:loss = -0.8619217, step = 1200 (2.322 sec)
INFO:tensorflow:global_step/sec: 42.7552
INFO:tensorflow:loss = -0.8232709, step = 1300 (2.339 sec)
INFO:tensorflow:global_step/sec: 42.3003
INFO:tensorflow:loss = -0.845335, step = 1400 (2.364 sec)
INFO:tensorflow:global_step/sec: 43.353

2023-04-07 19:37:52.683988: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:37:52.684547: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:37:52.685388: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:37:52.686351: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:37:52.686393: I tensorflow/core/co

INFO:tensorflow:Inference Time : 0.92012s
INFO:tensorflow:Finished evaluation at 2023-04-07-19:37:53
INFO:tensorflow:Saving dict for global step 2000: global_step = 2000, labels_mean = 1.9630322, logits_mean = 2.127494, loss = -0.805644, metric/ndcg@1 = 0.665, metric/ndcg@10 = 0.8230577, metric/ndcg@3 = 0.7217461, metric/ndcg@5 = 0.76058114
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 2000: /tmp/ranking_model_dir/model.ckpt-2000
INFO:tensorflow:global_step/sec: 22.6789
INFO:tensorflow:loss = -0.8421985, step = 2000 (4.409 sec)
INFO:tensorflow:global_step/sec: 40.7648
INFO:tensorflow:loss = -0.853926, step = 2100 (2.453 sec)
INFO:tensorflow:global_step/sec: 42.5663
INFO:tensorflow:loss = -0.85652995, step = 2200 (2.349 sec)
INFO:tensorflow:global_step/sec: 42.0595
INFO:tensorflow:loss = -0.8402942, step = 2300 (2.378 sec)
INFO:tensorflow:global_step/sec: 42.0409
INFO:tensorflow:loss = -0.8823969, step = 2400 (2.379 sec)
INFO:tensorflow:global_step/sec: 43.3201
INFO:t

2023-04-07 19:38:18.403126: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:38:18.403773: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:38:18.404280: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:38:18.405043: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:38:18.405078: I tensorflow/core/co

INFO:tensorflow:Inference Time : 0.90692s
INFO:tensorflow:Finished evaluation at 2023-04-07-19:38:19
INFO:tensorflow:Saving dict for global step 3000: global_step = 3000, labels_mean = 1.9630322, logits_mean = 2.6156185, loss = -0.8224302, metric/ndcg@1 = 0.69714284, metric/ndcg@10 = 0.8326173, metric/ndcg@3 = 0.7329309, metric/ndcg@5 = 0.769486
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 3000: /tmp/ranking_model_dir/model.ckpt-3000
INFO:tensorflow:global_step/sec: 22.8967
INFO:tensorflow:loss = -0.8494569, step = 3000 (4.368 sec)
INFO:tensorflow:global_step/sec: 40.9177
INFO:tensorflow:loss = -0.8190213, step = 3100 (2.444 sec)
INFO:tensorflow:global_step/sec: 42.4934
INFO:tensorflow:loss = -0.8363153, step = 3200 (2.353 sec)
INFO:tensorflow:global_step/sec: 42.6793
INFO:tensorflow:loss = -0.83196455, step = 3300 (2.343 sec)
INFO:tensorflow:global_step/sec: 43.2026
INFO:tensorflow:loss = -0.8564136, step = 3400 (2.315 sec)
INFO:tensorflow:global_step/sec: 41.9313


2023-04-07 19:38:44.445214: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:38:44.446039: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:38:44.446613: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:38:44.447382: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:38:44.447416: I tensorflow/core/co

INFO:tensorflow:Inference Time : 0.93187s
INFO:tensorflow:Finished evaluation at 2023-04-07-19:38:45
INFO:tensorflow:Saving dict for global step 4000: global_step = 4000, labels_mean = 1.9630322, logits_mean = 2.9765208, loss = -0.82578903, metric/ndcg@1 = 0.68357146, metric/ndcg@10 = 0.83014214, metric/ndcg@3 = 0.73244303, metric/ndcg@5 = 0.7694423
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 4000: /tmp/ranking_model_dir/model.ckpt-4000
INFO:tensorflow:global_step/sec: 21.7926
INFO:tensorflow:loss = -0.85554034, step = 4000 (4.589 sec)
INFO:tensorflow:global_step/sec: 39.7767
INFO:tensorflow:loss = -0.8523695, step = 4100 (2.514 sec)
INFO:tensorflow:global_step/sec: 40.668
INFO:tensorflow:loss = -0.8554511, step = 4200 (2.459 sec)
INFO:tensorflow:global_step/sec: 40.6598
INFO:tensorflow:loss = -0.8843602, step = 4300 (2.459 sec)
INFO:tensorflow:global_step/sec: 41.4199
INFO:tensorflow:loss = -0.86835945, step = 4400 (2.414 sec)
INFO:tensorflow:global_step/sec: 40.8

2023-04-07 19:39:11.090744: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:39:11.091560: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:39:11.092133: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:39:11.092809: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 19:39:11.092843: I tensorflow/core/co

INFO:tensorflow:Inference Time : 0.90976s
INFO:tensorflow:Finished evaluation at 2023-04-07-19:39:11
INFO:tensorflow:Saving dict for global step 5000: global_step = 5000, labels_mean = 1.9630322, logits_mean = 3.1429155, loss = -0.8247399, metric/ndcg@1 = 0.6685715, metric/ndcg@10 = 0.8292692, metric/ndcg@3 = 0.7333425, metric/ndcg@5 = 0.76657265
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: /tmp/ranking_model_dir/model.ckpt-5000
INFO:tensorflow:Loss for final step: -0.87629384.


({'labels_mean': 1.9630322,
  'logits_mean': 3.1429155,
  'loss': -0.8247399,
  'metric/ndcg@1': 0.6685715,
  'metric/ndcg@10': 0.8292692,
  'metric/ndcg@3': 0.7333425,
  'metric/ndcg@5': 0.76657265,
  'global_step': 5000},
 [])

## Inference and Predictions

We show how to generate predictions over the features of a dataset. We assume that the label is not present and needs to be inferred using the ranking model.

Similar to the `input_fn` used for training and evaluation,  `predict_input_fn` reads in data in ELWC format and stored as TFRecords to generate features. We set number of epochs to be 1, so that the generator stops iterating when it reaches the end of the dataset. Also the datapoints are not shuffled while reading, so that the behavior of the `predict()` function is deterministic.

In [32]:
def predict_input_fn(path):
    context_feature_spec = tf.feature_column.make_parse_example_spec(
        context_feature_columns().values())
    example_feature_spec = tf.feature_column.make_parse_example_spec(
        list(example_feature_columns().values()))
    dataset = tfr.data.build_ranking_dataset(
        file_pattern=path,
        data_format=tfr.data.ELWC,
        batch_size=_BATCH_SIZE,
        list_size=_LIST_SIZE,
        context_feature_spec=context_feature_spec,
        example_feature_spec=example_feature_spec,
        reader=tf.data.TFRecordDataset,
        shuffle=False,
        num_epochs=1)
    features = tf.compat.v1.data.make_one_shot_iterator(dataset).get_next()
    return features

We generate predictions on the test dataset, where we only consider context and example features and predict the labels. The `predict_input_fn` generates predictions on a batch of datapoints. Batching allows us to iterate over large datasets which cannot be loaded in memory.

In [17]:
predictions = ranker.predict(input_fn=lambda: predict_input_fn("/tmp/test.tfrecords"))

In [18]:
predictions

<generator object Estimator.predict at 0x7f4939cc5c40>

`ranker.predict` returns a generator, which we can iterate over to create predictions, till the generator is exhausted.

In [19]:
x = next(predictions)

INFO:tensorflow:vocabulary_size = 30522 in query_tokens is inferred from the number of elements in the vocabulary_file /tmp/vocab.txt.
INFO:tensorflow:vocabulary_size = 30522 in document_tokens is inferred from the number of elements in the vocabulary_file /tmp/vocab.txt.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:vocabulary_size = 30522 in query_tokens is inferred from the number of elements in the vocabulary_file /tmp/vocab.txt.
INFO:tensorflow:vocabulary_size = 30522 in document_tokens is inferred from the number of elements in the vocabulary_file /tmp/vocab.txt.
INFO:tensorflow:vocabulary_size = 30522 in query_tokens is inferred from the number of elements in the vocabulary_file /tmp/vocab.txt.
INFO:tensorflow:vocabulary_size = 30522 in document_tokens is inferred from the number of elements in the vocabulary_file /tmp/vocab.txt.
INFO:tensorflow:Done calling model_fn.


  tf.compat.v1.layers.flatten(context_features[name])
  tf.compat.v1.layers.flatten(group_features[name])
  cur_layer = tf.compat.v1.layers.batch_normalization(
  cur_layer = tf.compat.v1.layers.dense(cur_layer, units=layer_width)
  cur_layer = tf.compat.v1.layers.batch_normalization(
  cur_layer = tf.compat.v1.layers.dropout(
  logits = tf.compat.v1.layers.dense(cur_layer, units=_GROUP_SIZE)


INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/ranking_model_dir/model.ckpt-15000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


2023-04-07 18:36:08.220005: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 18:36:08.220645: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 18:36:08.221137: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 18:36:08.222343: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-07 18:36:08.222379: I tensorflow/core/co

In [20]:
x

array([ 4.6875005 , -3.5406964 ,  4.5525036 ,  2.8311815 , -0.0627908 ,
        2.1847188 , -1.1926783 , -0.5721942 ,  4.5686088 ,  4.643297  ,
        2.4712627 ,  3.3735383 , -6.5405955 ,  4.679387  ,  4.195493  ,
        4.3931046 ,  4.6696076 ,  4.586497  ,  3.0385761 ,  2.1049716 ,
        4.4866858 ,  4.3540955 ,  2.8567202 ,  1.7653334 ,  2.465593  ,
        1.7653961 ,  4.6871815 ,  4.621498  , -0.04419075,  4.322982  ,
        4.322982  ,  4.322982  ,  4.322982  ,  4.322982  ,  4.322982  ,
        4.322982  ,  4.322982  ,  4.322982  ,  4.322982  ,  4.322982  ,
        4.322982  ,  4.322982  ,  4.322982  ,  4.322982  ,  4.322982  ,
        4.322982  ,  4.322982  ,  4.322982  ,  4.322982  ,  4.322982  ],
      dtype=float32)

In [21]:
x = next(predictions)

In [22]:
x

array([  4.6372685 ,   0.21779408,   1.5418385 ,   3.3559597 ,
         4.663595  , -13.910055  ,   2.180758  ,   4.4911757 ,
         4.4917107 ,  -6.385597  ,   4.5188103 ,  -4.326332  ,
         4.189927  ,   4.3294077 ,   4.2710466 ,   4.5378838 ,
         4.5378838 ,   4.5378838 ,   4.5378838 ,   4.5378838 ,
         4.5378838 ,   4.5378838 ,   4.5378838 ,   4.5378838 ,
         4.5378838 ,   4.5378838 ,   4.5378838 ,   4.5378838 ,
         4.5378838 ,   4.5378838 ,   4.5378838 ,   4.5378838 ,
         4.5378838 ,   4.5378838 ,   4.5378838 ,   4.5378838 ,
         4.5378838 ,   4.5378838 ,   4.5378838 ,   4.5378838 ,
         4.5378838 ,   4.5378838 ,   4.5378838 ,   4.5378838 ,
         4.5378838 ,   4.5378838 ,   4.5378838 ,   4.5378838 ,
         4.5378838 ,   4.5378838 ], dtype=float32)

In [23]:
x = next(predictions)

In [24]:
x

array([ 3.8983476 , -2.6677127 ,  3.4520075 ,  0.21621923,  4.6609907 ,
       -3.2321227 , -6.6224174 ,  2.70285   , -2.7546382 ,  4.6620975 ,
        1.0986121 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,
        4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,
        4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,
        4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,
        4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,
        4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,
        4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,
        4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ,  4.1087656 ],
      dtype=float32)