In [37]:
import itertools

import click
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_ranking as tfr
from sklearn.metrics import f1_score

import logging
logger = logging.getLogger(__name__)


tf.enable_eager_execution()
tf.executing_eagerly()

# Store the paths to files containing training and test instances.
# As noted above, we will assume the data is in the LibSVM format
# and that the content of each file is sorted by query ID.

_TRAIN_DATA_PATH = ''
_TEST_DATA_PATH = ''

# Define a loss function. To find a complete list of available
# loss functions or to learn how to add your own custom function
# please refer to the tensorflow_ranking.losses module.
_LOSS = "pairwise_logistic_loss"
# _LOSS = "sigmoid_cross_entropy_loss"

# In the TF-Ranking framework, a training instance is represented
# by a Tensor that contains features from a list of documents
# associated with a single query. For simplicity, we fix the shape
# of these Tensors to a maximum list size and call it "list_size,"
# the maximum number of documents per query in the dataset.
# In this demo, we take the following approach:
#   * If a query has fewer documents, its Tensor will be padded
#     appropriately.
#   * If a query has more documents, we shuffle its list of
#     documents and trim the list down to the prescribed list_size.
_LIST_SIZE = 11

# The total number of features per query-document pair.
# We set this number to the number of features in the MSLR-Web30K
# dataset.

# Parameters to the scoring function.
_BATCH_SIZE = 100
_HIDDEN_LAYER_DIMS = ["20", "10"]


# _OUT_DIR = "../models/tfranking/"

def input_fn(path):
    train_dataset = tf.data.Dataset.from_generator(
        tfr.data.libsvm_generator(path, _NUM_FEATURES, _LIST_SIZE),
        output_types=(
            {str(k): tf.float32 for k in range(1, _NUM_FEATURES + 1)},
            tf.float32
        ),
        output_shapes=(
            {str(k): tf.TensorShape([_LIST_SIZE, 1])
             for k in range(1, _NUM_FEATURES + 1)},
            tf.TensorShape([_LIST_SIZE])
        )
    )

    train_dataset = train_dataset.batch(_BATCH_SIZE)
    return train_dataset.make_one_shot_iterator().get_next()


def example_feature_columns():
    """Returns the example feature columns."""
    feature_names = [
        "%d" % (i + 1) for i in range(0, _NUM_FEATURES)
    ]
    return {
        name: tf.feature_column.numeric_column(
            name, shape=(1,), default_value=0.0) for name in feature_names
    }


def make_score_fn():
    """Returns a scoring function to build `EstimatorSpec`."""

    def _score_fn(context_features, group_features, mode, params, config):
        """Defines the network to score a documents."""
        del params
        del config
        # Define input layer.
        example_input = [
            tf.layers.flatten(group_features[name])
            for name in sorted(example_feature_columns())
        ]
        input_layer = tf.concat(example_input, 1)

        cur_layer = input_layer
        for i, layer_width in enumerate(int(d) for d in _HIDDEN_LAYER_DIMS):
            cur_layer = tf.layers.dense(
                cur_layer,
                units=layer_width,
                activation="tanh")

        logits = tf.layers.dense(cur_layer, units=1)
        return logits

    return _score_fn


def eval_metric_fns():
    """Returns a dict from name to metric functions.

    This can be customized as follows. Care must be taken when handling padded
    lists.

    def _auc(labels, predictions, features):
    is_label_valid = tf_reshape(tf.greater_equal(labels, 0.), [-1, 1])
    clean_labels = tf.boolean_mask(tf.reshape(labels, [-1, 1], is_label_valid)
    clean_pred = tf.boolean_maks(tf.reshape(predictions, [-1, 1], is_label_valid)
    return tf.metrics.auc(clean_labels, tf.sigmoid(clean_pred), ...)
    metric_fns["auc"] = _auc

    Returns:
    A dict mapping from metric name to a metric function with above signature.
    """
    metric_fns = {}
    metric_fns.update({
        "metric/ndcg@%d" % topn: tfr.metrics.make_ranking_metric_fn(
            tfr.metrics.RankingMetricKey.NDCG, topn=topn)
        for topn in [1, 3, 5, 10]
    })

    return metric_fns


def get_estimator(hparams):
    """Create a ranking estimator.

    Args:
    hparams: (tf.contrib.training.HParams) a hyperparameters object.

    Returns:
    tf.learn `Estimator`.
    """

    def _train_op_fn(loss):
        """Defines train op used in ranking head."""
        return tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.train.get_global_step(),
            learning_rate=hparams.learning_rate,
            optimizer="Adagrad")

    ranking_head = tfr.head.create_ranking_head(
        loss_fn=tfr.losses.make_loss_fn(_LOSS),
        eval_metric_fns=eval_metric_fns(),
        train_op_fn=_train_op_fn)

    return tf.estimator.Estimator(
        model_fn=tfr.model.make_groupwise_ranking_fn(
            group_score_fn=make_score_fn(),
            group_size=1,
            transform_fn=None,
            ranking_head=ranking_head),
        params=hparams)


def ltr_to_submission(df, features, ranker, path):
    features = features + ['sid']

    preds = ranker.predict(input_fn=lambda: input_fn(path))
    import itertools
    import numpy as np
    # Not sure how to get all preds because it runs infinit
    # So I take all till list size
    preds_slice = itertools.islice(preds, len(df))
    count = 0
    a = np.zeros((len(df), _LIST_SIZE))

    for i in preds_slice:
        a[count] = i
        count += 1

    test_X = df[features]

    test_X = test_X.assign(yhat=a[:, 0])

    df_end = pd.DataFrame(columns=['yhat'], index=df.sid.unique())

    df_end = test_X.sort_values(['sid', 'yhat'], ascending=False).groupby('sid').first()[[
        'yhat', 'transport_mode'
    ]]

    from sklearn.metrics import f1_score
    score = f1_score(df.groupby("sid").first()['click_mode'], df_end.transport_mode, average='weighted')
    print('F1 Score is: {}'.format(score))

    return df_end


In [38]:
df_train_train = pd.read_pickle("../data/processed/ranking/train_all_row_sample_50.pickle")
df_train_test = pd.read_pickle("../data/processed/ranking/train_all_row_sample_50.pickle")

_TRAIN_DATA_PATH="../data/processed/ranking/train_all_row_sample_50.libsvm"
_TEST_DATA_PATH="../data/processed/ranking/train_all_row_sample_50.libsvm"

with open('../data/processed/ranking/features_tfranking.txt') as f:
    features = f.read().splitlines()

_NUM_FEATURES = len(features)

hparams = tf.contrib.training.HParams(learning_rate=0.001)
ranker = get_estimator(hparams)

ranker.train(input_fn=lambda: input_fn(_TRAIN_DATA_PATH), steps=100)

df_preds = ltr_to_submission(df_train_test, features, ranker, _TEST_DATA_PATH)


INFO:tensorflow:Building groupwise ranking model.
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpt2boxnv0', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff05551ada0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpt2boxnv0/model.ckpt.
INFO:tensorflow:loss = 0.6843738, step = 1
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpt2boxnv0/model.ckpt.
INFO:tensorflow:Loss for final step: 0.6843738.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpt2boxnv0/model.ckpt-1
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
F1 Score is: 0.51


  'recall', 'true', average, warn_for)


In [39]:
i = 0
while i < 100:
    ranker.train(input_fn=lambda: input_fn(_TRAIN_DATA_PATH), steps=100000)
    i +=1

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpt2boxnv0/model.ckpt-1
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpt2boxnv0/model.ckpt.
INFO:tensorflow:loss = 0.6690021, step = 2
INFO:tensorflow:Saving checkpoints for 2 into /tmp/tmpt2boxnv0/model.ckpt.
INFO:tensorflow:Loss for final step: 0.6690021.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpt2boxnv0/model.ckpt-2
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 2 into /tmp/tmpt2boxnv0/model.ckpt.
INFO:tensorflow:loss = 0.6682758, step = 3
INFO:tensorflow:Saving 

KeyboardInterrupt: 

In [10]:
features = features + ['sid']

In [19]:
path=_TEST_DATA_PATH

In [20]:
df=df_train_test.copy()

In [21]:
features

['transport_mode', 'distance_plan', 'eta', 'price', 'sid']

In [49]:
preds = ranker.predict(input_fn=lambda: input_fn(path))


In [50]:
a = []
for i in preds:
    a.append(i)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpt2boxnv0/model.ckpt-11
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [63]:
a

[array([-0.96864575, -0.96864575, -0.21231593, -0.21231593, -0.96864575,
        -0.62884283, -0.00105851, -0.00105851, -0.00105851, -0.00105851,
        -0.00105851], dtype=float32),
 array([-0.96864575, -0.96864575, -0.21231593, -0.00105851, -0.00105851,
        -0.00105851, -0.00105851, -0.00105851, -0.00105851, -0.00105851,
        -0.00105851], dtype=float32),
 array([-0.39646304, -0.96864575, -0.21231593, -0.39646304, -0.96864575,
        -0.39646304, -0.00105851, -0.00105851, -0.00105851, -0.00105851,
        -0.00105851], dtype=float32),
 array([-0.96864575, -0.21231593, -0.99811614, -0.96864575, -0.96864575,
        -0.00105851, -0.00105851, -0.00105851, -0.00105851, -0.00105851,
        -0.00105851], dtype=float32),
 array([-0.38047838, -0.96864575, -0.39646304, -0.39646304, -0.96864575,
        -0.96864575, -0.00105851, -0.00105851, -0.00105851, -0.00105851,
        -0.00105851], dtype=float32),
 array([-0.39646304, -0.96864575, -0.96864575, -0.96864575, -0.39646304,
       

In [69]:
a=np.array(a)

In [74]:
a.argmin(axis=0)

array([9, 0, 3, 3, 0, 4, 9, 0, 0, 0, 0])

In [78]:
features=features+['sid']

In [80]:
test_X = df[features]


In [82]:
test_X.head(10)

Unnamed: 0,transport_mode,distance_plan,eta,price,sid
1432355,4,47429,4604,14100.0,10
1432357,3,47429,4604,700.0,10
1432360,1,48995,7396,3200.0,10
1432359,11,47796,7234,3000.0,10
1432358,8,49758,6878,5500.0,10
1432356,2,49067,6345,3100.0,10
1435574,4,7667,1229,2100.0,21
1435572,2,6157,1289,300.0,21
1435573,3,7667,929,700.0,21
1460646,5,2714,2434,700.0,25


In [97]:
df_count_sid = pd.DataFrame(test_X.groupby('sid').count()['transport_mode'])
df_count_sid.columns=['count_sid']
df_count_sid = df_count_sid.reset_index()

In [98]:
df_count_sid

Unnamed: 0,sid,count_sid
0,10,6
1,21,3
2,25,6
3,34,5
4,35,6
5,36,6
6,44,5
7,68,4
8,69,4
9,79,7


In [100]:
# Attach yhats for all sid 
# We need to count how many sids we have and attach the first n from preds
df_X_count = pd.merge(test_X, df_count_sid)    

In [105]:
df_X_count.head(10)

Unnamed: 0,transport_mode,distance_plan,eta,price,sid,count_sid
0,4,47429,4604,14100.0,10,6
1,3,47429,4604,700.0,10,6
2,1,48995,7396,3200.0,10,6
3,11,47796,7234,3000.0,10,6
4,8,49758,6878,5500.0,10,6
5,2,49067,6345,3100.0,10,6
6,4,7667,1229,2100.0,21,3
7,2,6157,1289,300.0,21,3
8,3,7667,929,700.0,21,3
9,5,2714,2434,700.0,25,6


In [104]:
a.shape

(10, 11)

In [167]:
df_preds = pd.DataFrame(a)

In [168]:
df_preds

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,-0.968646,-0.968646,-0.212316,-0.212316,-0.968646,-0.628843,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059
1,-0.968646,-0.968646,-0.212316,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059
2,-0.396463,-0.968646,-0.212316,-0.396463,-0.968646,-0.396463,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059
3,-0.968646,-0.212316,-0.998116,-0.968646,-0.968646,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059
4,-0.380478,-0.968646,-0.396463,-0.396463,-0.968646,-0.968646,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059
5,-0.396463,-0.968646,-0.968646,-0.968646,-0.396463,-0.396463,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059
6,-0.968646,-0.968646,-0.968646,-0.212316,-0.212316,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059
7,-0.212316,-0.968646,-0.968646,-0.968646,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059
8,-0.968646,-0.212316,-0.968646,-0.968646,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059
9,-0.968646,-0.968646,-0.396463,-0.380478,-0.968646,-0.968646,-0.380478,-0.001059,-0.001059,-0.001059,-0.001059


In [188]:
df_preds = pd.DataFrame(a)
df_preds = df_preds.assign(sid = df_X_count.sid.unique())
df_preds = df_preds.assign(count_sid = df_X_count.groupby('sid').first()['count_sid'].values)

In [189]:
df_preds

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,sid,count_sid
0,-0.968646,-0.968646,-0.212316,-0.212316,-0.968646,-0.628843,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,10,6
1,-0.968646,-0.968646,-0.212316,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,21,3
2,-0.396463,-0.968646,-0.212316,-0.396463,-0.968646,-0.396463,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,25,6
3,-0.968646,-0.212316,-0.998116,-0.968646,-0.968646,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,34,5
4,-0.380478,-0.968646,-0.396463,-0.396463,-0.968646,-0.968646,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,35,6
5,-0.396463,-0.968646,-0.968646,-0.968646,-0.396463,-0.396463,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,36,6
6,-0.968646,-0.968646,-0.968646,-0.212316,-0.212316,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,44,5
7,-0.212316,-0.968646,-0.968646,-0.968646,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,68,4
8,-0.968646,-0.212316,-0.968646,-0.968646,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,69,4
9,-0.968646,-0.968646,-0.396463,-0.380478,-0.968646,-0.968646,-0.380478,-0.001059,-0.001059,-0.001059,-0.001059,79,7


sid
10    6
21    3
25    6
34    5
35    6
36    6
44    5
68    4
69    4
79    7
Name: count_sid, dtype: int64

In [174]:
df_X_count.count_sid.unique()

array([6, 3, 5, 4, 7])

In [170]:
df_preds

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,sid
0,-0.968646,-0.968646,-0.212316,-0.212316,-0.968646,-0.628843,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,10
1,-0.968646,-0.968646,-0.212316,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,21
2,-0.396463,-0.968646,-0.212316,-0.396463,-0.968646,-0.396463,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,25
3,-0.968646,-0.212316,-0.998116,-0.968646,-0.968646,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,34
4,-0.380478,-0.968646,-0.396463,-0.396463,-0.968646,-0.968646,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,35
5,-0.396463,-0.968646,-0.968646,-0.968646,-0.396463,-0.396463,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,36
6,-0.968646,-0.968646,-0.968646,-0.212316,-0.212316,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,44
7,-0.212316,-0.968646,-0.968646,-0.968646,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,68
8,-0.968646,-0.212316,-0.968646,-0.968646,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,69
9,-0.968646,-0.968646,-0.396463,-0.380478,-0.968646,-0.968646,-0.380478,-0.001059,-0.001059,-0.001059,-0.001059,79


In [171]:
df_preds = pd.merge(df_preds, df_X_count[['sid', 'count_sid']], left_on='sid', right_on='sid')

In [172]:
df_preds

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,sid,count_sid
0,-0.968646,-0.968646,-0.212316,-0.212316,-0.968646,-0.628843,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,10,6
1,-0.968646,-0.968646,-0.212316,-0.212316,-0.968646,-0.628843,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,10,6
2,-0.968646,-0.968646,-0.212316,-0.212316,-0.968646,-0.628843,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,10,6
3,-0.968646,-0.968646,-0.212316,-0.212316,-0.968646,-0.628843,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,10,6
4,-0.968646,-0.968646,-0.212316,-0.212316,-0.968646,-0.628843,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,10,6
5,-0.968646,-0.968646,-0.212316,-0.212316,-0.968646,-0.628843,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,10,6
6,-0.968646,-0.968646,-0.212316,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,21,3
7,-0.968646,-0.968646,-0.212316,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,21,3
8,-0.968646,-0.968646,-0.212316,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,21,3
9,-0.396463,-0.968646,-0.212316,-0.396463,-0.968646,-0.396463,-0.001059,-0.001059,-0.001059,-0.001059,-0.001059,25,6


In [154]:
df_stacked = pd.DataFrame(df_preds.iloc[:,0:10].stack())

In [162]:
pd.DataFrame(pd.DataFrame(df_stacked.to_records()))

Unnamed: 0,level_0,level_1,0
0,0,0,-0.968646
1,0,1,-0.968646
2,0,2,-0.212316
3,0,3,-0.212316
4,0,4,-0.968646
5,0,5,-0.628843
6,0,6,-0.001059
7,0,7,-0.001059
8,0,8,-0.001059
9,0,9,-0.001059


In [166]:
pd.merge(pd.DataFrame(pd.DataFrame(df_stacked.to_records())),
         df_preds[['count_sid', 'sid']],
         left_on='level_0',
         right_index=True)

Unnamed: 0,level_0,level_1,0,count_sid,sid
0,0,0,-0.968646,6,10
1,0,1,-0.968646,6,10
2,0,2,-0.212316,6,10
3,0,3,-0.212316,6,10
4,0,4,-0.968646,6,10
5,0,5,-0.628843,6,10
6,0,6,-0.001059,6,10
7,0,7,-0.001059,6,10
8,0,8,-0.001059,6,10
9,0,9,-0.001059,6,10


In [None]:
df_X_count.assign(yhat = None)

for sid in enumerate(df_X_count.sid.unique(), range(0, len(df_X_count))):
    df_sid = df_X_count[df_X_count == sid]
    

In [None]:
test_X = df[features]

test_X = test_X.assign(yhat=a[:, 0])

df_end = pd.DataFrame(columns=['yhat'], index=df.sid.unique())

df_end = test_X.sort_values(['sid', 'yhat'], ascending=False).groupby('sid').first()[[
    'yhat', 'transport_mode'
]]

from sklearn.metrics import f1_score
score = f1_score(df.groupby("sid").first()['click_mode'], df_end.transport_mode, average='weighted')
print('F1 Score is: {}'.format(score))

return df_end

# 100 k

In [41]:
df_train_train = pd.read_pickle("../data/processed/ranking/train_all_row_sample_100k.pickle")
df_train_test = pd.read_pickle("../data/processed/ranking/train_all_row_sample_100k.pickle")

_TRAIN_DATA_PATH="../data/processed/ranking/train_all_row_sample_100k.libsvm"
_TEST_DATA_PATH="../data/processed/ranking/train_all_row_sample_100k.libsvm"

with open('../data/processed/ranking/features_tfranking.txt') as f:
    features = f.read().splitlines()

_NUM_FEATURES = len(features)

hparams = tf.contrib.training.HParams(learning_rate=0.001)
ranker = get_estimator(hparams)

ranker.train(input_fn=lambda: input_fn(_TRAIN_DATA_PATH), steps=100000)

df_preds = ltr_to_submission(df_train_test, features, ranker, _TEST_DATA_PATH)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp6u9m8hpf', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd5f40de8d0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create Checkpo

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmp6u9m8hpf/model.ckpt.
INFO:tensorflow:loss = 68.177574, step = 1
INFO:tensorflow:global_step/sec: 14.8009
INFO:tensorflow:loss = 64.45798, step = 101 (6.758 sec)
INFO:tensorflow:global_step/sec: 13.6414
INFO:tensorflow:loss = 64.24874, step = 201 (7.330 sec)
INFO:tensorflow:global_step/sec: 14.9547
INFO:tensorflow:loss = 65.40012, step = 301 (6.687 sec)
INFO:tensorflow:global_step/sec: 14.3119
INFO:tensorflow:loss = 63.03412, step = 401 (6.987 sec)
INFO:tensorflow:global_step/sec: 10.8212
INFO:tensorflow:loss = 62.565254, step = 501 (9.241 sec)
INFO:tensorflow:global_step/sec: 11.9035
INFO:tensorflow:loss = 61.585995, step = 601 (8.401 sec)
INFO:tensorflow:global_step/sec: 12.6482
INFO:tensorflow:loss = 63.88342, step = 701 (7.907 sec)
INFO:tensorflow:global_step/sec: 9.53363
INFO:tensorflow:loss = 63.386856, step =

# 1k 

In [42]:
df_train_train = pd.read_pickle("../data/processed/ranking/train_all_row_sample_1k.pickle")
df_train_test = pd.read_pickle("../data/processed/ranking/train_all_row_sample_1k.pickle")

_TRAIN_DATA_PATH="../data/processed/ranking/train_all_row_sample_1k.libsvm"
_TEST_DATA_PATH="../data/processed/ranking/train_all_row_sample_1k.libsvm"

with open('../data/processed/ranking/features_tfranking.txt') as f:
    features = f.read().splitlines()

_NUM_FEATURES = len(features)

hparams = tf.contrib.training.HParams(learning_rate=0.001)
ranker = get_estimator(hparams)

ranker.train(input_fn=lambda: input_fn(_TRAIN_DATA_PATH), steps=100000)

df_preds = ltr_to_submission(df_train_test, features, ranker, _TEST_DATA_PATH)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpw1w9j_7b', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd5f807d160>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create Checkpo

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpw1w9j_7b/model.ckpt.
INFO:tensorflow:loss = 68.24118, step = 1
INFO:tensorflow:Saving checkpoints for 10 into /tmp/tmpw1w9j_7b/model.ckpt.
INFO:tensorflow:Loss for final step: 67.33364.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpw1w9j_7b/model.ckpt-10
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
F1 Score is: 0.32420710379700923


In [47]:
i = 0
while i < 100:
    ranker.train(input_fn=lambda: input_fn(_TRAIN_DATA_PATH), steps=100000)
    i +=1

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpw1w9j_7b/model.ckpt-40
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 40 into /tmp/tmpw1w9j_7b/model.ckpt.
INFO:tensorflow:loss = 66.30565, step = 41
INFO:tensorflow:Saving checkpoints for 50 into /tmp/tmpw1w9j_7b/model.ckpt.
Instructions for updating:
Use standard file APIs to delete files with this prefix.
INFO:tensorflow:Loss for final step: 66.46154.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpw1w9j_7b/model.ckpt-50
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done r

KeyboardInterrupt: 

# With more features 

In [14]:
df_train_train = pd.read_pickle("../data/processed/ranking/train_all_row_sample_50.pickle")
df_train_test = pd.read_pickle("../data/processed/ranking/train_all_row_sample_50.pickle")

_TRAIN_DATA_PATH="../data/processed/ranking/train_all_row_sample_50_af.libsvm"
_TEST_DATA_PATH="../data/processed/ranking/train_all_row_sample_50_af.libsvm"

with open('../data/processed/ranking/features_tfranking_all.txt') as f:
    features = f.read().splitlines()

_NUM_FEATURES = len(features)

hparams = tf.contrib.training.HParams(learning_rate=0.1)
ranker = get_estimator(hparams)

ranker.train(input_fn=lambda: input_fn(_TRAIN_DATA_PATH), steps=100)

df_preds = ltr_to_submission(df_train_test, features, ranker, _TEST_DATA_PATH)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpx8tmx8ej', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f4389a3f048>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create Checkpo

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:loss = 58.15442, step = 1
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 58.15442.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-1
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
F1 Score is: 0.7


  'recall', 'true', average, warn_for)


In [15]:
i = 0
while i < 100:
    ranker.train(input_fn=lambda: input_fn(_TRAIN_DATA_PATH), steps=100)
    i +=1

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-1
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:loss = 64.30162, step = 2
INFO:tensorflow:Saving checkpoints for 2 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 64.30162.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-2
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 2 into /tmp/tmpx8tmx8ej/mode

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-15
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 15 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:loss = 42.969257, step = 16
INFO:tensorflow:Saving checkpoints for 16 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 42.969257.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-16
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 16 into /tmp/tmpx8tmx

INFO:tensorflow:Loss for final step: 42.439896.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-29
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 29 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:loss = 42.420555, step = 30
INFO:tensorflow:Saving checkpoints for 30 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 42.420555.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-30
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorf

INFO:tensorflow:Saving checkpoints for 43 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 42.25838.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-43
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 43 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:loss = 42.250237, step = 44
INFO:tensorflow:Saving checkpoints for 44 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 42.250237.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-44
INFO:tensorflow:Runn

INFO:tensorflow:loss = 42.1722, step = 57
INFO:tensorflow:Saving checkpoints for 57 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 42.1722.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-57
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 57 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:loss = 42.167747, step = 58
INFO:tensorflow:Saving checkpoints for 58 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 42.167747.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8

INFO:tensorflow:Saving checkpoints for 70 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:loss = 42.12197, step = 71
INFO:tensorflow:Saving checkpoints for 71 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 42.12197.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-71
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 71 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:loss = 42.119175, step = 72
INFO:tensorflow:Saving checkpoints for 72 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 42.119175.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensor

INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 84 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:loss = 42.089134, step = 85
INFO:tensorflow:Saving checkpoints for 85 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 42.089134.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-85
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 85 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:loss = 42.08721, step = 86
INFO:tensorflow:Saving checkpoints for 86 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 42.08721.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensor

INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 98 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:loss = 42.066006, step = 99
INFO:tensorflow:Saving checkpoints for 99 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 42.066006.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpx8tmx8ej/model.ckpt-99
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 99 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:loss = 42.06461, step = 100
INFO:tensorflow:Saving checkpoints for 100 into /tmp/tmpx8tmx8ej/model.ckpt.
INFO:tensorflow:Loss for final step: 42.06461.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tens

In [10]:

df_preds = ltr_to_submission(df_train_test, features, ranker, _TEST_DATA_PATH)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Use groupwise dnn v2.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpcfv9fxvt/model.ckpt-101
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
F1 Score is: 0.51
