In [2]:
import sys

sys.path.insert(0, '/home/jianx/search-exposure/')
import torch
from annoy import AnnoyIndex
import forward_ranker.load_data as load_data
import forward_ranker.train as train
from forward_ranker.utils import print_message
from forward_ranker.test import get_ndcg_precision_rr
obj_reader = load_data.obj_reader
obj_writer = load_data.obj_writer
import pickle
import numpy as np
import random
import math

In [2]:
GROUND_TRUTH_PATH = "/datadrive/jianx/data/results/all_search_rankings_100_100_flat.csv"

In [3]:
def load_ground_truth(path=GROUND_TRUTH_PATH):
    all_results = {}
    with open(path, "r") as f:
        for line in f:
            qid = int(line.split(",")[0])
            pid = int(line.split(",")[1])
            rank = int(line.split(",")[2])
            if pid not in all_results.keys():
                all_results[pid] = {}
            all_results[pid][qid] = 101 - rank
    return all_results

In [4]:
ratings = load_ground_truth(GROUND_TRUTH_PATH)

In [5]:
# Full
baseline_full = obj_reader("/datadrive/ruohan/reverse_ranker/residual/loss15_layer3/forward_baseline_rank_test.pickle")
append_full = obj_reader("/datadrive/ruohan/reverse_ranker/append/layer3_no_split/pred_rank_test.pickle")
residual_full = obj_reader("/datadrive/ruohan/reverse_ranker/residual/loss15_layer3/pred_rank_test.pickle")

In [6]:
# Half
baseline_half = obj_reader("/datadrive/ruohan/reverse_ranker/residual/train_query_250000/forward_baseline_rank_test.pickle")
append_half = obj_reader("/datadrive/ruohan/reverse_ranker/append/train_query_250000/pred_rank_test.pickle")
residual_half = obj_reader("/datadrive/ruohan/reverse_ranker/residual/train_query_250000/pred_rank_test.pickle")

# 1/10
baseline_110 = obj_reader("/datadrive/ruohan/reverse_ranker/residual/train_query_50000/forward_baseline_rank_test.pickle")
append_110 = obj_reader("/datadrive/ruohan/reverse_ranker/append/train_query_50000/pred_rank_test.pickle")
residual_110 = obj_reader("/datadrive/ruohan/reverse_ranker/residual/train_query_50000/pred_rank_test.pickle")

# 1/10 sample from query
baseline_q = obj_reader("/datadrive/ruohan/reverse_ranker/residual/train_query_50000_morepos/forward_baseline_rank_test.pickle")
append_q = obj_reader("/datadrive/ruohan/reverse_ranker/append/train_query_50000_morepos/pred_rank_test.pickle")
residual_q = obj_reader("/datadrive/ruohan/reverse_ranker/residual/train_query_50000_morepos/pred_rank_test.pickle")

In [7]:
def get_reverse_ndcg_precision_rr(true_dict, test_dict, rank):
    sorted_result = list(test_dict.items())
    original_rank = rank
    rank = min(rank, len(sorted_result))
    cumulative_gain = 0
    num_positive = 0
    rr = float("NaN")
    for i in range(len(sorted_result)):
        pid = sorted_result[i][0]
        if pid in true_dict:
            rr = 1 / (i + 1)
            break
    for i in range(rank):
        pid = sorted_result[i][0]
        if pid in true_dict:
            num_positive += 1
    for i in range(rank):
        pid = sorted_result[i][0]
        relevance = 0
        if pid in true_dict:
            relevance = true_dict[pid]
        discounted_gain = relevance / math.log2(2 + i)
        cumulative_gain += discounted_gain
    sorted_ideal = sorted(true_dict.items(), key=lambda x: x[1], reverse=True)
    ideal_gain = 0
    for i in range(rank):
        relevance = 0
        if i < len(sorted_ideal):
            relevance = sorted_ideal[i][1]
        discounted_gain = relevance / math.log2(2 + i)
        ideal_gain += discounted_gain
    ndcg = 0
    if ideal_gain != 0:
         ndcg = cumulative_gain / ideal_gain
    return ndcg, num_positive / original_rank, rr

def calculate_metrics(rating_dict, result_dict, rank=10):
    pids = list(result_dict.keys())
    result_ndcg = []
    result_prec = []
    result_rr = []
    for pid in pids:
        if pid in rating_dict:
            ndcg, prec, rr = get_reverse_ndcg_precision_rr(rating_dict[pid], result_dict[pid], rank)
            result_ndcg.append(ndcg)
            result_prec.append(prec)
            result_rr.append(rr)
    avg_ndcg = np.nanmean(result_ndcg)
    avg_prec = np.nanmean(result_prec)
    avg_rr = np.nanmean(result_rr)
    print("NDCG@{}: {:.4f}".format(rank,avg_ndcg),"Precision@{}: {:.4f}".format(rank, avg_prec), "RR: {:.4f}".format(avg_rr))

In [8]:
print("With full query log")
print("Baseline forward embedding model")
calculate_metrics(ratings, baseline_full)
calculate_metrics(ratings, baseline_full, 100)
print("Append embedding model")
calculate_metrics(ratings, append_full)
calculate_metrics(ratings, append_full, 100)
print("Residual embedding model")
calculate_metrics(ratings, residual_full)
calculate_metrics(ratings, residual_full, 100)

With full query log
Baseline forward embedding model
NDCG@10: 0.6218 Precision@10: 0.3244 RR: 0.7892
NDCG@100: 0.6915 Precision@100: 0.0605 RR: 0.7892
Append embedding model
NDCG@10: 0.8110 Precision@10: 0.4149 RR: 0.9073
NDCG@100: 0.8533 Precision@100: 0.0693 RR: 0.9073
Residual embedding model
NDCG@10: 0.8204 Precision@10: 0.4183 RR: 0.9096
NDCG@100: 0.8597 Precision@100: 0.0694 RR: 0.9096


In [9]:
print("With half query log")
print("Baseline forward embedding model")
calculate_metrics(ratings, baseline_half)
calculate_metrics(ratings, baseline_half, 100)
print("Append embedding model")
calculate_metrics(ratings, append_half)
calculate_metrics(ratings, append_half, 100)
print("Residual embedding model")
calculate_metrics(ratings, residual_half)
calculate_metrics(ratings, residual_half, 100)

With half query log
Baseline forward embedding model
NDCG@10: 0.6242 Precision@10: 0.3243 RR: 0.7910
NDCG@100: 0.6945 Precision@100: 0.0606 RR: 0.7910
Append embedding model
NDCG@10: 0.8018 Precision@10: 0.4104 RR: 0.9015
NDCG@100: 0.8462 Precision@100: 0.0689 RR: 0.9015
Residual embedding model
NDCG@10: 0.7732 Precision@10: 0.3964 RR: 0.8780
NDCG@100: 0.8228 Precision@100: 0.0681 RR: 0.8780


In [10]:
print("With 1/10 query log")
print("Baseline forward embedding model")
calculate_metrics(ratings, baseline_110)
calculate_metrics(ratings, baseline_110, 100)
print("Append embedding model")
calculate_metrics(ratings, append_110)
calculate_metrics(ratings, append_110, 100)
print("Residual embedding model")
calculate_metrics(ratings, residual_110)
calculate_metrics(ratings, residual_110, 100)

With 1/10 query log
Baseline forward embedding model
NDCG@10: 0.6210 Precision@10: 0.3256 RR: 0.7913
NDCG@100: 0.6912 Precision@100: 0.0609 RR: 0.7913
Append embedding model
NDCG@10: 0.6991 Precision@10: 0.3680 RR: 0.8201
NDCG@100: 0.7634 Precision@100: 0.0672 RR: 0.8201
Residual embedding model
NDCG@10: 0.6936 Precision@10: 0.3603 RR: 0.8293
NDCG@100: 0.7593 Precision@100: 0.0657 RR: 0.8293


In [12]:
print("Sample from query results")
print("Baseline forward embedding model")
calculate_metrics(ratings, baseline_q)
calculate_metrics(ratings, baseline_q, 100)
print("Append embedding model")
calculate_metrics(ratings, append_q)
calculate_metrics(ratings, append_q, 100)
print("Residual embedding model")
calculate_metrics(ratings, residual_q)
calculate_metrics(ratings, residual_q, 100)

Sample from query results
Baseline forward embedding model
NDCG@10: 0.6223 Precision@10: 0.3219 RR: 0.7903
NDCG@100: 0.6915 Precision@100: 0.0590 RR: 0.7903
Append embedding model
NDCG@10: 0.7656 Precision@10: 0.3923 RR: 0.8743
NDCG@100: 0.8169 Precision@100: 0.0665 RR: 0.8743
Residual embedding model
NDCG@10: 0.7236 Precision@10: 0.3710 RR: 0.8471
NDCG@100: 0.7831 Precision@100: 0.0650 RR: 0.8471


In [None]:
print("Residual baseline forward embedding model")
calculate_metrics(ratings, baseline_reverse_residual)
calculate_metrics(ratings, baseline_reverse_residual, 100)
print("Residual embedding model")
calculate_metrics(ratings, trained_reverse_residual)
calculate_metrics(ratings, trained_reverse_residual, 100)

In [None]:
calculate_metrics(ratings, new_baseline_reverse)
calculate_metrics(ratings, new_baseline_reverse,100)

calculate_metrics(ratings, new_trained_reverse)
calculate_metrics(ratings, new_trained_reverse, 100)

In [48]:
import random
train_results = {1:[(1,2),(3,4)],2:[(5,6),(3,4)],3:[(7,8),(3,4)]}
train_set = dict(random.sample(list(train_results.items()), 2))
for key,value in train_set.items():
    for qid,rank in value:
        print(key,qid,rank)

2 5 6
2 3 4
1 1 2
1 3 4


In [3]:
print_message("Loading embeddings.")
passage_embeddings = obj_reader("/home/jianx/results/passage_0__emb_p__data_obj_0.pb")
query_train_embeddings = obj_reader("/home/jianx/results/query_0__emb_p__data_obj_0.pb")
query_train_mapping = obj_reader("/datadrive/jianx/data/annoy/100_ance_query_train_map.dict")
pid_mapping = obj_reader("/datadrive/jianx/data/annoy/100_ance_passage_map.dict")

[Sep 08, 23:19:04] Loading embeddings.


In [7]:
import faiss
faiss.omp_set_num_threads(16)
dim = passage_embeddings.shape[1]
cpu_index = faiss.IndexFlatIP(dim)
cpu_index.add(passage_embeddings[:100])
cpu_index.search(np.array([query_train_embeddings[1]]), 100)

(array([[694.0746 , 692.02124, 691.7162 , 690.04065, 689.5751 , 688.8958 ,
         688.7236 , 688.52374, 688.50146, 688.38464, 688.3286 , 687.8984 ,
         687.7049 , 687.64087, 687.5666 , 687.0526 , 686.83325, 686.5188 ,
         686.1091 , 685.68335, 685.2858 , 684.66833, 684.6572 , 684.63184,
         684.4979 , 684.372  , 684.3336 , 684.2833 , 684.04095, 683.85657,
         683.85455, 683.2638 , 682.99414, 682.90594, 682.9051 , 682.73004,
         682.64417, 682.41626, 682.27124, 682.2625 , 681.9237 , 681.91614,
         681.91187, 681.80334, 681.4107 , 681.4092 , 681.36536, 681.33496,
         681.1986 , 680.83124, 680.7844 , 680.31506, 680.0169 , 680.0029 ,
         679.9907 , 679.9363 , 679.67365, 679.55884, 679.55743, 679.30554,
         679.23157, 679.1638 , 679.0986 , 679.08167, 678.9065 , 678.8589 ,
         678.84595, 678.84375, 678.81696, 678.7909 , 678.62354, 678.2627 ,
         677.9946 , 677.9926 , 677.82153, 677.6101 , 677.40936, 677.3904 ,
         677.2928 , 677.0