<a href="https://colab.research.google.com/github/Abhisekh97/RankNet_tf_keras/blob/main/RankNet_tensorflow_keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, activations, losses, Model, Input
from tensorflow.nn import leaky_relu
import numpy as np
from itertools import combinations
from tensorflow.keras.utils import plot_model, Progbar
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


##  Model Architecture

In [None]:
class RankNet(Model):
  def __init__(self):
    super().__init__(self)
    self.dense = [layers.Dense(16, activation=leaky_relu), layers.Dense(8, activation=leaky_relu)]
    self.o = layers.Dense(1, activation='linear')
    self.oi_minus_oj = layers.Subtract()


  def call(self, input):
    xi , xj = input
    densei = self.dense[0](xi)
    densej = self.dense[0](xj)
    for dense in self.dense[1:]:
      densei = dense(densei)
      densej = dense(densej)

    oi = self.o(densei)
    oj = self.o(densej)
    oij = self.oi_minus_oj([oi, oj])
    output = layers.Activation('sigmoid')(oij)
    return output

  def build_graph(self):
    x = [Input(shape=(10)), Input(shape=(10))]
    return Model(inputs=x, outputs=self.call(x))

model = RankNet()
model.build_graph().summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 10)]                 0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 10)]                 0         []                            
                                                                                                  
 dense (Dense)               (None, 16)                   176       ['input_1[0][0]',             
                                                                     'input_2[0][0]']             
                                                                                                  
 dense_1 (Dense)             (None, 8)                    136       ['dense[0][0]',           

In [None]:
nb_query = 20
query = np.array([i+1 for i in range(nb_query) for x in range(int(np.ceil(np.abs(np.random.normal(0,scale=15))+2)))])
query


array([ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,
        3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
        4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
        5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,
        6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
        6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
        7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,
        8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,
        9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
        9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
        9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11,
       11, 11, 11, 11, 11

In [None]:
doc_features = np.random.random((len(query), 10))
doc_features.shape

In [None]:
doc_scores = np.random.randint(5, size=len(query)).astype(np.float32)
doc_scores.shape

(399,)

In [None]:
doc_scores

array([4., 2., 3., 0., 0., 4., 3., 3., 1., 4., 2., 3., 2., 0., 3., 1., 2.,
       1., 0., 1., 2., 4., 4., 0., 1., 1., 0., 3., 4., 2., 0., 2., 4., 0.,
       3., 3., 2., 1., 4., 1., 4., 0., 1., 0., 2., 3., 1., 3., 4., 0., 1.,
       3., 1., 1., 3., 1., 3., 0., 2., 3., 0., 3., 1., 0., 0., 3., 3., 2.,
       0., 2., 0., 4., 1., 0., 4., 2., 1., 2., 3., 0., 0., 0., 0., 3., 3.,
       2., 1., 2., 4., 0., 4., 3., 0., 1., 2., 3., 3., 3., 3., 0., 3., 1.,
       4., 0., 2., 2., 1., 3., 2., 2., 4., 3., 0., 4., 1., 1., 0., 3., 4.,
       2., 3., 0., 0., 1., 4., 1., 0., 4., 2., 0., 4., 2., 3., 3., 3., 0.,
       1., 4., 4., 0., 1., 1., 1., 4., 2., 0., 0., 2., 3., 2., 0., 0., 3.,
       4., 4., 3., 0., 1., 0., 0., 3., 0., 4., 2., 3., 4., 2., 2., 3., 0.,
       4., 1., 3., 1., 4., 1., 4., 1., 3., 1., 2., 1., 3., 0., 1., 0., 4.,
       3., 0., 1., 3., 4., 1., 3., 0., 3., 2., 0., 1., 1., 1., 2., 4., 3.,
       3., 0., 1., 3., 2., 3., 4., 0., 2., 0., 4., 2., 0., 2., 4., 4., 1.,
       4., 0., 4., 1., 2.

So above features are doc_features which equals to the number of Query.
so for each query there is one feature vector .
target is basically score provided between 0 to 4 as good to bad relevance. 0 being very bad relevance and 4 being highly relevant items. These labels will be given by Human in practical Scenerio or will be collected from some Human interface as a click Stream Data.  

In [None]:
xi = []
xj = []
pij = []
pair_id = []
pair_query_id = []
for q in np.unique(query):
  query_idx = np.where(query == q)[0]
  for pair_idx in combinations(query_idx, 2):
    # print(pair_idx)
    pair_query_id.append(q)
    pair_id.append(pair_idx)
    i = pair_idx[0]
    j = pair_idx[1]
    xi.append(doc_features[i])
    xj.append(doc_features[j])

    if doc_scores[i] == doc_scores[j]:
        _pij = 0.5
    elif doc_scores[i] > doc_scores[j]:
        _pij = 1
    else:
        _pij = 0
    pij.append(_pij)


In [None]:
xi = np.array(xi)
xj = np.array(xj)
pij = np.array(pij)
pair_query_id = np.array(pair_query_id)

In [None]:
xi_train, xi_test, xj_train, xj_test, pij_train, pij_test, pair_id_train, pair_id_test = train_test_split(
    xi, xj, pij, pair_id, test_size=0.2, stratify=pair_query_id)

In [None]:
ranknet = RankNet()

In [None]:
ranknet.compile(optimizer = 'adam', loss = 'binary_crossentropy')

In [None]:
ranknet.fit([xi_train, xj_train], pij_train , epochs=50, batch_size=1, validation_data=([xi_test, xj_test], pij_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7f1c8bdd5ed0>