In [1]:
import sys

sys.path.insert(0, '/home/jianx/search-exposure/')
import torch
from annoy import AnnoyIndex
import forward_ranker.load_data as load_data
import forward_ranker.train as train
from forward_ranker.utils import print_message
from forward_ranker.test import get_ndcg_precision_rr
obj_reader = load_data.obj_reader
obj_writer = load_data.obj_writer
import pickle
import numpy as np
import random
import math

In [2]:
GROUND_TRUTH_PATH = "/datadrive/jianx/data/results/all_search_rankings_100_100_flat.csv"

In [3]:
def load_ground_truth(path=GROUND_TRUTH_PATH):
    all_results = {}
    with open(path, "r") as f:
        for line in f:
            qid = int(line.split(",")[0])
            pid = int(line.split(",")[1])
            rank = int(line.split(",")[2])
            if pid not in all_results.keys():
                all_results[pid] = {}
            all_results[pid][qid] = 101 - rank
    return all_results

In [4]:
ratings = load_ground_truth(GROUND_TRUTH_PATH)

In [34]:
baseline_reverse = obj_reader("/datadrive/ruohan/reverse_ranker/bug_fixed/forward_baseline_rank_test.pickle")
trained_reverse = obj_reader("/datadrive/ruohan/reverse_ranker/bug_fixed/pred_rank_test.pickle")
transformed_reverse = obj_reader("/datadrive/ruohan/reverse_ranker/transformation/pred_rank_test.pickle")

In [20]:
def get_reverse_ndcg_precision_rr(true_dict, test_dict, rank):
    sorted_result = list(test_dict.items())
    original_rank = rank
    rank = min(rank, len(sorted_result))
    cumulative_gain = 0
    num_positive = 0
    rr = float("NaN")
    for i in range(len(sorted_result)):
        pid = sorted_result[i][0]
        if pid in true_dict:
            rr = 1 / (i + 1)
            break
    for i in range(rank):
        pid = sorted_result[i][0]
        if pid in true_dict:
            num_positive += 1
    for i in range(rank):
        pid = sorted_result[i][0]
        relevance = 0
        if pid in true_dict:
            relevance = true_dict[pid]
        discounted_gain = relevance / math.log2(2 + i)
        cumulative_gain += discounted_gain
    sorted_ideal = sorted(true_dict.items(), key=lambda x: x[1], reverse=True)
    ideal_gain = 0
    for i in range(rank):
        relevance = 0
        if i < len(sorted_ideal):
            relevance = sorted_ideal[i][1]
        discounted_gain = relevance / math.log2(2 + i)
        ideal_gain += discounted_gain
    ndcg = 0
    if ideal_gain != 0:
         ndcg = cumulative_gain / ideal_gain
    return ndcg, num_positive / original_rank, rr

def calculate_metrics(rating_dict, result_dict, rank=10):
    pids = list(result_dict.keys())
    result_ndcg = []
    result_prec = []
    result_rr = []
    for pid in pids:
        if pid in rating_dict:
            ndcg, prec, rr = get_reverse_ndcg_precision_rr(rating_dict[pid], result_dict[pid], rank)
            result_ndcg.append(ndcg)
            result_prec.append(prec)
            result_rr.append(rr)
    avg_ndcg = np.nanmean(result_ndcg)
    avg_prec = np.nanmean(result_prec)
    avg_rr = np.nanmean(result_rr)
    print("NDCG@{}: {:.4f}".format(rank,avg_ndcg),"Precision@{}: {:.4f}".format(rank, avg_prec), "RR: {:.4f}".format(avg_rr))

In [33]:
print("Baseline forward embedding model")
calculate_metrics(ratings, baseline_reverse)
calculate_metrics(ratings, baseline_reverse, 100)
print("Append embedding model")
calculate_metrics(ratings, trained_reverse)
calculate_metrics(ratings, trained_reverse, 100)
print("Transformation model")
calculate_metrics(ratings, transformed_reverse)
calculate_metrics(ratings, transformed_reverse, 100)

Baseline forward embedding model
NDCG@10: 0.6106 Precision@10: 0.3182 RR: 0.7823
NDCG@100: 0.6830 Precision@100: 0.0567 RR: 0.7823
Append embedding model
NDCG@10: 0.7169 Precision@10: 0.3716 RR: 0.8510
NDCG@100: 0.7771 Precision@100: 0.0625 RR: 0.8510
Transformation model
NDCG@10: 0.2047 Precision@10: 0.1030 RR: 0.3943
NDCG@100: 0.2693 Precision@100: 0.0243 RR: 0.3943


In [25]:
calculate_metrics(ratings, new_baseline_reverse)
calculate_metrics(ratings, new_baseline_reverse,100)

calculate_metrics(ratings, new_trained_reverse)
calculate_metrics(ratings, new_trained_reverse, 100)

NDCG@10: 0.6125 Precision@10: 0.3235 RR: 0.7946
NDCG@100: 0.6826 Precision@100: 0.0607 RR: 0.7946
NDCG@10: 0.7182 Precision@10: 0.3770 RR: 0.8552
NDCG@100: 0.7785 Precision@100: 0.0669 RR: 0.8552
