In [1]:
import math
import numpy as np

def generate_tweetid_gain(file_name):
    qrels_dict = {}
    with open(file_name, 'r', errors='ignore') as f:
        for line in f:
            ele = line.strip().split(' ')
            if ele[0] not in qrels_dict:
                qrels_dict[ele[0]] = {}
            # here we want the gain of doc_id in qrels_dict > 0,
            # so it's sorted values can be IDCG groundtruth
            if int(ele[3]) > 0:
                qrels_dict[ele[0]][ele[2]] = int(ele[3])
    return qrels_dict

def read_tweetid_test(file_name):
    # input file format
    # query_id doc_id
    # query_id doc_id
    # query_id doc_id
    # ...
    test_dict = {}
    with open(file_name, 'r', errors='ignore') as f:
        for line in f:
            ele = line.strip().split(' ')
            if ele[0] not in test_dict:
                test_dict[ele[0]] = []
            test_dict[ele[0]].append(ele[1])
    return test_dict

#qrels_dict是真实情况 test_dict是查询的结果
def MAP_eval(qrels_dict, test_dict, k = 100):
    #每个quel的precision的平均
    map_list = []
    for quel in qrels_dict:
        k_list = []
        l = min(len(qrels_dict[quel].keys()),len(test_dict[quel]),k)
        quel_docs = [i for i in set(qrels_dict[quel].keys())]
        for r in range(l): #前r个文档遍历
            if test_dict[quel][r] in quel_docs:
                k_list.append(len([i for i in test_dict[quel][:r+1] if i in quel_docs])/(r+1))#相关文档统计正确率
        map_list.append(np.sum(np.array(k_list))/len([i for i in test_dict[quel][:l] if i in quel_docs]))
    return np.mean(np.array(map_list))

def MRR_eval(qrels_dict, test_dict, k = 100):
    K_list = []
    for quel in qrels_dict:
        First_doc = list(qrels_dict[quel].keys())[:k]
        K = 100
        for doc in First_doc:
            K = min(K,test_dict[quel].index(doc))
        K_list.append(K)
    return np.mean(1/(np.array(K_list)+1))

def NDCG_eval(qrels_dict, test_dict, k = 100):
    NDCG = []
    for quel in qrels_dict:
        rel = []
        i = np.arange(1,k+1)
        for j in test_dict[quel][:k]:
            try:
                rel.append(qrels_dict[quel][j])
            except KeyError:
                rel.append(0)
        #可能不到k个结果
        l = min(len(test_dict[quel][:]),k)
        i = np.arange(1,l+1)
        DCG_list = np.array(rel)/np.log2(i+1)#不能是0
        DCG = np.sum(DCG_list)
        #排序
        quel_docs = [i for i in list(qrels_dict[quel].keys())][:l]
        sorted_list = [i for i in quel_docs if qrels_dict[quel][i] == 2] +\
        [i for i in quel_docs if qrels_dict[quel][i] == 1]
        
        IDCG_rel = []
        for j in sorted_list:
            IDCG_rel.append(qrels_dict[quel][j])
        I = np.arange(1,len(sorted_list)+1)
        IDCG_list = np.array(IDCG_rel)/np.log2(I+1)#不能是0
        IDCG = np.sum(IDCG_list)
        NDCG.append(DCG/IDCG)
    return np.mean(np.array(NDCG))


In [2]:
def evaluation():
    k = 100
    # query relevance file
    file_qrels_path = 'qrels.txt'
    # qrels_dict = {query_id:{doc_id:gain, doc_id:gain, ...}, ...}
    qrels_dict = generate_tweetid_gain(file_qrels_path)
    # ur result, format is in function read_tweetid_test, or u can write by ur own
    file_test_path = 'result.txt'
    # test_dict = {query_id:[doc_id, doc_id, ...], ...}
    test_dict = read_tweetid_test(file_test_path)

    MAP = MAP_eval(qrels_dict, test_dict, k)
    print('MAP', ' = ', MAP, sep='')
    
    MRR = MRR_eval(qrels_dict, test_dict, k)
    print('MRR', ' = ', MRR, sep='')

    NDCG = NDCG_eval(qrels_dict, test_dict, k)
    print('NDCG', ' = ', NDCG, sep='')


In [3]:
if __name__ == '__main__':
    evaluation()


MAP = 0.8701836509684747
MRR = 0.79737012987013
NDCG = 0.8666625607201335
