In [33]:
import pickle
import numpy as np

DEV_RANKING_PATH = "/datadrive/ruohan/evaluation/best_residual_split_50000_morepos/dev_set.result"
DEV_TOP1000_PATH = "/datadrive/ruohan/data/top1000.dev"
PASSAGE_MAP_PATH = "/datadrive/jianx/data/annoy/100_ance_passage_map.dict"
TEST_RANKING_PATH = "/datadrive/ruohan/evaluation/best_append_no_split/test_set.result"
QUERY_TEST_REVERSE_MAP_PATH = "/home/jianx/results/test-query_qid2offset.pickle"
TOP_DICT_PATH = "/datadrive/jianx/data/initial_ranking.dict"
RATING_DICT_PATH = "/datadrive/jianx/data/rel_scores.dict"

In [2]:
def obj_reader(path):
    with open(path, 'rb') as handle:
        return pickle.load(handle, encoding="bytes")

In [3]:
def load_top1000(path):
    with open(path, "r") as file:
        top_dict = {}
        for line in file:
            tokens = line.split("\t")
            qid = int(tokens[0])
            pid = int(tokens[1])
            if qid not in top_dict:
                top_dict[qid] = []
            top_dict[qid].append(pid)
    return top_dict

In [4]:
def transform_full_retrieval(full_retrieval, query_mapping, passage_mapping):
    query_mapping = {val + 502939: key for key, val in query_mapping.items()}
    rank_true_pid = {query_mapping[key]:val for key, val in full_retrieval.items()}
    for qid, pids in rank_true_pid.items():
        temp = [passage_mapping[pid] for pid in pids]
        rank_true_pid[qid] = temp
    return rank_true_pid

In [38]:
def compute_coverage_rate(full_retrieval, top1000, topk):
    results = []
    for qid, pids in full_retrieval.items():
        full_retrieval_set = set(pids[:topk])
        top1000_set = set(top1000[qid])
        results.append(len(full_retrieval_set & top1000_set) / topk)
    print("Average Coverage Rate: {}".format(np.mean(results)))
    return results

In [48]:
def compute_coverage_rate_test(full_retrieval, top1000, topk, rel_scores, inter_option):
    results = []
    results_n_rel = []
    avg_rel_n = 0
    for qid, pids in full_retrieval.items():
        temp_rel_dict = rel_scores[qid]
        avg_rel_n += len(temp_rel_dict)
        full_retrieval_set = set(pids[:topk])
        top1000_set = set(top1000[qid])
        inter_set = list(full_retrieval_set & top1000_set)
        results.append(len(inter_set) / topk)
        count = 0
        if inter_option:
            for pid in inter_set:
                if pid in temp_rel_dict:
                    count += 1      
        else:
            for pid in list(full_retrieval_set):
                if pid in temp_rel_dict:
                    count += 1
        results_n_rel.append(count)
    print("Average # of Relevant Passages: {}".format(avg_rel_n / len(results)))
    print("Average Coverage Rate: {} Average Relevant Passages: {}".format(np.mean(results), np.mean(results_n_rel)))
    return results, results_n_rel

In [16]:
passage_mapping = obj_reader(PASSAGE_MAP_PATH)
full_retrieval_rank = obj_reader(DEV_RANKING_PATH)["ranking"]
query_mapping = obj_reader("/datadrive/data/preprocessed_data_with_dev/qid2offset.pickle")
top_1000_results = load_top1000(DEV_TOP1000_PATH)
rank_true_pid = transform_full_retrieval(full_retrieval_rank, query_mapping, passage_mapping)

In [29]:
test_rank = obj_reader(TEST_RANKING_PATH)["ranking"]
query_test_mapping = obj_reader(QUERY_TEST_REVERSE_MAP_PATH)
test_top1000 = obj_reader(TOP_DICT_PATH)
rank_true_pid_test = transform_full_retrieval(test_rank, query_test_mapping, passage_mapping)

In [34]:
test_rel_scores = obj_reader(RATING_DICT_PATH)

## Dev Set

In [39]:
results_100 = compute_coverage_rate(rank_true_pid, top_1000_results, 100)

Average Coverage Rate: 0.32498997134670493


In [40]:
results_10 = compute_coverage_rate(rank_true_pid, top_1000_results, 10)

Average Coverage Rate: 0.6120630372492837


In [59]:
results_10 = compute_coverage_rate(rank_true_pid, top_1000_results, 3)

Average Coverage Rate: 0.72774594078319


## Test Set

In [49]:
results_100_test = compute_coverage_rate_test(rank_true_pid_test, test_top1000, 100, test_rel_scores, False)
results_10_test = compute_coverage_rate_test(rank_true_pid_test, test_top1000, 10, test_rel_scores, False)

Average # of Relevant Passages: 95.3953488372093
Average Coverage Rate: 0.35441860465116276 Average Relevant Passages: 25.41860465116279
Average # of Relevant Passages: 95.3953488372093
Average Coverage Rate: 0.6860465116279071 Average Relevant Passages: 6.558139534883721


In [50]:
results_100_test = compute_coverage_rate_test(rank_true_pid_test, test_top1000, 100, test_rel_scores, True)
results_10_test = compute_coverage_rate_test(rank_true_pid_test, test_top1000, 10, test_rel_scores, True)

Average # of Relevant Passages: 95.3953488372093
Average Coverage Rate: 0.35441860465116276 Average Relevant Passages: 20.511627906976745
Average # of Relevant Passages: 95.3953488372093
Average Coverage Rate: 0.6860465116279071 Average Relevant Passages: 5.953488372093023
