## Final Report 
####  Corse Name: Machine Learning and Information Retrieval System
####  Student ID: 201811552
####  Name: Shingo Watanabe


###  Final Report  Overall
I implemented Ranking method. In the process of researching Ranknet, I found out that there are three types of methods: pointwise, pairwise, and listnet methods. This time I created a ranking model using the pairwise method. I used Relu for the activation function and Adam for the optimization function. To create the model, I used the ideas in the following references.


#### References
1. 'From RankNet to LambdaRank to LambdaMART' https://www.microsoft.com/en-us/research/uploads/prod/2016/02/MSR-TR-2010-82.pdf
- 'PyTorchを用いたRankNetの実装' https://www.szdrblog.info/entry/2018/12/23/234612
- 'PytorchによるRankNet' https://ryo59.github.io/ranknet.html
- 'RankNetを実装してランキング学習' https://qiita.com/kzkadc/items/c358338f0d8bd764f514

### 1 Import necessary modules and classes

In [1]:
import numpy  as np
import torch
from ptranking.data.data_utils import LTRDataset, SPLIT_TYPE
import torch.nn as nn
from ptranking.ltr_adhoc.eval.parameter import ScoringFunctionParameter
from ptranking.ltr_adhoc.eval.eval_utils import ndcg_at_ks, ndcg_at_k
from ptranking.metric.adhoc_metric import torch_nDCG_at_k, torch_nDCG_at_ks
import torch.nn.functional as F
import optuna

### 2 Define Neural Network: RankNet

- init : Inherit and initialize the RankNet class. 
- forward: Repeat the calculation of the input and application of the activation function in the forward direction.
- loss: Computation of loss functions using pairwise methods.
- predict: Function for testing.

In [2]:
class RankNet(nn.Module):
    def __init__(self, trial, num_layer, input_dim, h_dim, lr_rate):
        super(RankNet, self).__init__()
        self.activation = get_activation(trial)
        # first layer
        self.fc = nn.ModuleList([nn.Linear(input_dim, h_dim[0])])
        # after first layer
        for i in range(1, num_layer):
            self.fc.append(nn.Linear(h_dim[i-1], h_dim[i]))
        self.dropout = nn.Dropout(lr_rate)

        self.fc_last = nn.Linear(h_dim[i], 1)

    def forward(self, x):
        for i, l in enumerate(self.fc):
            x = self.dropout(x)
            x = l(x)
            x = self.activation(x)
        x = self.fc_last(x)
        return x

    def loss(self, torch_batch_rankings, torch_batch_std_labels):

        # Make a pair from the model predictions
        batch_pred = self.forward(torch_batch_rankings)  # batch_pred = [40,1]
        batch_pred_dim = torch.squeeze(batch_pred, 1) # batch_pred_dim = [40]
        batch_pred_diffs = batch_pred - torch.unsqueeze(batch_pred_dim, 0)  # batch_pred_diffs=[40, 40]

        # Make a pair from the relevance of the label
        batch_std = torch_batch_std_labels # batch_std = [40]
        batch_std_diffs = torch.unsqueeze(batch_std, 1) - torch.unsqueeze(batch_std, 0) # batch_std_diffs=[40, 40]

        batch_Sij = torch.clamp(batch_std_diffs, -1, 1)

        sigma = 1.0
        batch_loss_1st = 0.5 * sigma * batch_pred_diffs * (1.0 - batch_Sij)
        batch_loss_2nd = torch.log(torch.exp(-sigma * batch_pred_diffs) + 1.0)

        # Calculate loss outside diagonal
        diagona = 1 - torch.eye(batch_loss_1st.shape[0])
        batch_loss = (batch_loss_1st + batch_loss_2nd) * diagona
        combination = (batch_loss_1st.shape[0] * (batch_loss_1st.shape[0] - 1)) / 2

        batch_loss_triu = (torch.sum(batch_loss) / 2) / combination

        return batch_loss_triu

    def predict(self, x):
        return self.forward(x)

### 3 Define optimization and activation function

- get_optimizer: Optimization function is Adam.
- get_activation: Activation function is Relu.

In [3]:
def get_optimizer(trial, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

    return optimizer

def get_activation(trial):
    activation = F.relu
    return activation

### 4 Training

In [4]:
def train(model, train_data, optimizer):
    model.train()
    for qid, torch_batch_rankings, torch_batch_std_labels in train_data:
        data, target = torch_batch_rankings, torch_batch_std_labels
        optimizer.zero_grad()
        loss = model.loss(data, target)
        loss.backward()
        optimizer.step()

### 5 Testing

In [5]:
def test(model, test_data):
    # Testing
    ks=[1, 5, 10]
    sum_ndcg_at_ks = torch.zeros(len(ks))
    cnt = torch.zeros(1)
    for qid, batch_ranking, batch_labels in test_data:
        if torch.sum(batch_labels) <=0: # filter dumb queries
            continue

        batch_rele_preds = model.predict(batch_ranking)
        pred_ar = batch_rele_preds.squeeze(1)
        # _, order = torch.sort(batch_labels, descending=True)
        # y_pred_sorted = batch_labels[0][order]

        _, batch_sorted_inds = torch.sort(batch_rele_preds, dim=1, descending=True)
        # print(batch_sorted_inds[0].T)
        # exit()
        batch_sys_sorted_labels = torch.gather(batch_labels, dim=1, index=batch_sorted_inds[0].T)
        # print(batch_sys_sorted_labels)
        # exit()
        batch_ideal_sorted_labels, _ = torch.sort(batch_labels, dim=1, descending=True)
        # print(batch_ideal_sorted_labels)

        batch_ndcg_at_ks = torch_nDCG_at_ks(
            batch_sys_sorted_labels=batch_sys_sorted_labels,
            batch_ideal_sorted_labels=batch_ideal_sorted_labels,
            ks=ks
        )

        # default batch_size=1 due to testing data
        sum_ndcg_at_ks = torch.add(sum_ndcg_at_ks, torch.squeeze(batch_ndcg_at_ks, dim=0))
        cnt += 1

    avg_ndcg_at_ks = sum_ndcg_at_ks/cnt
    print('ndcg =', avg_ndcg_at_ks)
    return avg_ndcg_at_ks[2]

### 6 Run some functions
- objective: Pass data to train and test and run some function.

In [6]:
def objective(trial):
    train_file = './vali_as_train.txt'
    test_file = './test.txt'

    train_data = LTRDataset(
                            data_id='MQ2007_Super',
                            file=train_file,
                            split_type=SPLIT_TYPE.Train,
                            batch_size=1,
                            shuffle=True,
                            presort=True,
                            data_dict=None,
                            eval_dict=None,
                            buffer=False
                        )

    test_data = LTRDataset(
                            data_id='MQ2007_Super',
                            file=test_file,
                            split_type=SPLIT_TYPE.Test,
                            shuffle=False,
                            data_dict=None,
                            batch_size=1,
                            buffer=False
                        )
    num_layer = trial.suggest_int('num_layer', 3, 7)

    h_dim = [int(trial.suggest_discrete_uniform("h_dim_"+str(i), 16, 128, 16)) for i in range(num_layer)]

    lr_rate = trial.suggest_uniform("dropout_l", 0.2, 0.5)

    ranknet = RankNet(trial, num_layer, 46, h_dim, lr_rate)
    optimizer = get_optimizer(trial, ranknet)
    for step in range(EPOCH):
        train(ranknet, train_data, optimizer)
        ndcg = test(ranknet, test_data)

    return ndcg

In [7]:
EPOCH = 1

TRIAL_SIZE = 2
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=TRIAL_SIZE)

[32m[I 2021-01-04 11:44:49,905][0m A new study created in memory with name: no-name-9543953d-4c42-4ca3-b247-7279d5857fe7[0m
[32m[I 2021-01-04 11:44:52,545][0m Trial 0 finished with value: 0.28270575404167175 and parameters: {'num_layer': 6, 'h_dim_0': 112.0, 'h_dim_1': 48.0, 'h_dim_2': 112.0, 'h_dim_3': 16.0, 'h_dim_4': 128.0, 'h_dim_5': 64.0, 'dropout_l': 0.35003328805771716}. Best is trial 0 with value: 0.28270575404167175.[0m


ndcg = tensor([0.1276, 0.2234, 0.2827])


[32m[I 2021-01-04 11:44:55,514][0m Trial 1 finished with value: 0.28270575404167175 and parameters: {'num_layer': 7, 'h_dim_0': 96.0, 'h_dim_1': 96.0, 'h_dim_2': 112.0, 'h_dim_3': 128.0, 'h_dim_4': 16.0, 'h_dim_5': 16.0, 'h_dim_6': 112.0, 'dropout_l': 0.41825214020750034}. Best is trial 0 with value: 0.28270575404167175.[0m


ndcg = tensor([0.1276, 0.2234, 0.2827])
