In [1]:
import requests
import re
from elasticsearch import Elasticsearch
from elasticsearch.helpers import parallel_bulk
from json import JSONDecodeError
from collections import defaultdict
import json
import numpy as np

from util.es import ES
from smart_dataset.evaluation.dbpedia.evaluate import load_type_hierarchy, evaluate, get_type_path

In [2]:
base_models = ['EC', 'TC']
base_similarities = ['BM25', 'LM']
es_list = [ES(model, similarity) for model in base_models for similarity in base_similarities]

In [3]:
# this might report the wrong number if run right after reindexing
print('Num indexed documents: ')
for es in es_list:
    num = int(es.es.cat.count(es.get_index(), params={"format": "json"})[0]['count'])
    print(f'{es.model} - {es.similarity}:\t{num}')

Num indexed documents: 
EC - BM25:	3498975
EC - LM:	3498975
TC - BM25:	462
TC - LM:	462


In [4]:
#test = ES('EC', 'BM25')
#res = test.es.search(index=test.get_index(), q = 'This is a big test', explain=True)

In [5]:
def get_ground_truth(dataset, type_hierarchy):
    ground_truth = {}
    for query in dataset:
        ID = query['id']
        if query['category'] != 'resource':
            continue

        ground_truth_type = [t for t in query['type'] if t in type_hierarchy]
        if not ground_truth_type:
            continue

        ground_truth[ID] = {
            'category': 'resource',
            'type': ground_truth_type
        }
        
    return ground_truth

In [6]:
def format_outputs(es, dataset, type_hierarchy, k):
    results = es.generate_baseline_scores('train', k)
    system_output = {}
    for query in dataset:
        ID = query['id']
        if query['category'] != 'resource':
            continue

        if not any([t for t in query['type'] if t in type_hierarchy]):
            continue

        system_output_type = [t for t,s in results[ID] if t in type_hierarchy] if ID in results else []
        #system_output_type = get_type_path(system_output_type[0], type_hierarchy) if system_output_type else []

        system_output[ID] = {
            'category': 'resource',
            'type': system_output_type
        }
        
    return system_output

In [7]:
with open('./data/train_set_fixed.json', 'r') as f:
    train_set = json.load(f)
    
type_hierarchy, max_depth = load_type_hierarchy('./smart_dataset/evaluation/dbpedia/dbpedia_types.tsv')
ground_truth = get_ground_truth(train_set, type_hierarchy)

Loading type hierarchy from ./smart_dataset/evaluation/dbpedia/dbpedia_types.tsv... 760 types loaded (max depth: 7)


In [8]:
for es in es_list:
    for k in [5,10,20,50,100]:
        if es.model == 'TC' and k != 100:
            continue
            
        system_output = format_outputs(es, train_set, type_hierarchy, k)
        print('\nEvaluation for {} - {} - {}:'.format(es.model, es.similarity, k))
        evaluate(system_output, ground_truth, type_hierarchy, max_depth)

File 'top100_EC_BM25_train' not found.
Retrieving from index.

Evaluation for EC - BM25 - 5:


Evaluation results:
-------------------
Category prediction (based on 7662 questions)
  Accuracy: 1.000
Type ranking (based on 7662 questions)
  NDCG@5:  0.224
  NDCG@10: 0.251

Evaluation for EC - BM25 - 10:


Evaluation results:
-------------------
Category prediction (based on 7662 questions)
  Accuracy: 1.000
Type ranking (based on 7662 questions)
  NDCG@5:  0.210
  NDCG@10: 0.245

Evaluation for EC - BM25 - 20:


Evaluation results:
-------------------
Category prediction (based on 7662 questions)
  Accuracy: 1.000
Type ranking (based on 7662 questions)
  NDCG@5:  0.203
  NDCG@10: 0.236

Evaluation for EC - BM25 - 50:


Evaluation results:
-------------------
Category prediction (based on 7662 questions)
  Accuracy: 1.000
Type ranking (based on 7662 questions)
  NDCG@5:  0.201
  NDCG@10: 0.227

Evaluation for EC - BM25 - 100:


Evaluation results:
-------------------
Category prediction 