In [1]:
from __future__ import print_function

import sys
import numpy as np
from sklearn.preprocessing import QuantileTransformer
from sklearn.model_selection import train_test_split

sys.path.append('..')
import tensorflow as tf
from supplementary_code_direct_ranker.DirectRanker import directRanker
from supplementary_code_direct_ranker.helpers import readData, nDCGScorer_cls, MAP_cls

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  return f(*args, **kwds)


In [2]:
x_train, y_train, q_train = readData(data_path="OHSUMED_TRAIN.txt", binary=False, 
                                     at=10, number_features=25, bin_cutoff=1.5, cut_zeros=False)
x_test, y_test, q_test = readData(data_path="OHSUMED_TEST.txt", binary=False, 
                                  at=10, number_features=25, bin_cutoff=1.5, cut_zeros=False)

In [3]:
def lambda_cost(nn, y0):
    return tf.reduce_mean(tf.log(1+tf.exp(nn))-nn)


# Load directRanker, train, and test
dr = directRanker(
    feature_activation=tf.nn.tanh,
    ranking_activation=tf.nn.tanh,
    # max_steps=10000,
    # For debugging
    #cost=lambda_cost,
    max_steps=10000,
    print_step=500,
    start_batch_size=3,
    end_batch_size=5,
    start_qids=20,
    end_qids=100,
    feature_bias=True,
    hidden_layers=[100, 50, 5]
)

In [4]:
dr.fit(x_train, y_train, ranking=True)

INFO:tensorflow:Scale of 0 disables regularizer.
step: 0, value: 1.1983919143676758, samples: 3, queries: 20
step: 500, value: 0.8622041940689087, samples: 3, queries: 21
step: 1000, value: 0.824661910533905, samples: 3, queries: 23
step: 1500, value: 0.7897867560386658, samples: 3, queries: 25
step: 2000, value: 0.8631294369697571, samples: 3, queries: 27
step: 2500, value: 0.8872365355491638, samples: 3, queries: 29
step: 3000, value: 0.7855363488197327, samples: 3, queries: 32
step: 3500, value: 0.8341662287712097, samples: 3, queries: 35
step: 4000, value: 0.7485103607177734, samples: 3, queries: 38
step: 4500, value: 0.6176104545593262, samples: 3, queries: 41
step: 5000, value: 0.6630043387413025, samples: 3, queries: 44
step: 5500, value: 0.6010100841522217, samples: 3, queries: 48
step: 6000, value: 0.5183827877044678, samples: 4, queries: 52
step: 6500, value: 0.5566627383232117, samples: 4, queries: 56
step: 7000, value: 0.5442885160446167, samples: 4, queries: 61
step: 7500,

In [25]:
prediction = []
ideal_rank = []
for i in range(len(x_test)):
    pred_q = dr.predict_proba(x_test[i])
    
    sort_idx = np.argsort(np.concatenate(pred_q))
    sorted_list = y_test[i][sort_idx][::-1] #по мнению модели
    yref = sorted(y_test[i], reverse=True) #идеальное ранжирование для запроса
    
    prediction.append(sorted_list)
    ideal_rank.append(yref)

In [30]:
prediction = [x.flatten() for x in prediction]