## Imports

In [1]:
import json
import elasticsearch
import string
import re
import math
import os
import numpy as np
from gensim.test.utils import datapath
from gensim.models import KeyedVectors
from gensim.models import Word2Vec
from gensim.scripts.glove2word2vec import glove2word2vec
from sklearn.metrics.pairwise import cosine_similarity
from collections import Counter
from elasticsearch import Elasticsearch

In [2]:
QUERY_TRAIN_FILEPATH = 'datasets\DBpedia\smarttask_dbpedia_train.json'
QUERY_TEST_FILEPATH = 'datasets\DBpedia\smarttask_dbpedia_test_questions.json'
INDEX_NAME = 'smart'

In [3]:
stop_words = set(['a', 'an', 'and', 'are', 'as', 'at', 'be', 'but', 'by', 'for', 'if', 'in', 'into', 'is', 'it', 'no', 'not', 'of', 'on', 'or', 'such', 'that', 'the', 'their', 'then', 'there', 'these', 'they', 'this', 'to', 'was', 'will', 'with'])
print(stop_words) # Default in ElasticSearch

{'to', 'an', 'a', 'as', 'but', 'are', 'then', 'is', 'with', 'the', 'at', 'these', 'for', 'not', 'and', 'this', 'was', 'will', 'no', 'that', 'they', 'or', 'in', 'of', 'be', 'by', 'into', 'if', 'it', 'on', 'there', 'their', 'such'}


In [4]:
def preprocess(text):
    text = text.strip().lower()
    text = text.replace('_', ' ').replace('-', ' ')
    text = text.translate(str.maketrans('', '', string.punctuation)) # Remove punctuation stuff.
    text = re.sub('\s\s+', ' ', text).split(' ') # Replace consequtive whitespace with a single space.
    return ' '.join([v for v in text if not v in stop_words]).strip()

## Word2Vec - Convert GloVe to Gensim

In [28]:
EMBEDDING_FILE = datapath(os.getcwd()+'/datasets/gensim/gensim.6B.100d.txt')
def convertGloveToGensim(target, output):
    _ = glove2word2vec(datapath(os.getcwd()+target), datapath(os.getcwd()+output))

#convertGloveToGensim('/datasets/glove/glove.6B.50d.txt', '/datasets/gensim/gensim.6B.50d.txt')
#convertGloveToGensim('/datasets/glove/glove.6B.100d.txt', '/datasets/gensim/gensim.6B.100d.txt')
#convertGloveToGensim('/datasets/glove/glove.6B.200d.txt', '/datasets/gensim/gensim.6B.200d.txt')
#convertGloveToGensim('/datasets/glove/glove.6B.300d.txt', '/datasets/gensim/gensim.6B.300d.txt')

## Elastic search

In [6]:
es = Elasticsearch()
es.info()

{'name': 'ULTIMECIA',
 'cluster_name': 'elasticsearch',
 'cluster_uuid': 'MHYEAbcOS_i6ybp0d4NE2A',
 'version': {'number': '7.9.1',
  'build_flavor': 'default',
  'build_type': 'zip',
  'build_hash': '083627f112ba94dffc1232e8b42b73492789ef91',
  'build_date': '2020-09-01T21:22:21.964974Z',
  'build_snapshot': False,
  'lucene_version': '8.6.2',
  'minimum_wire_compatibility_version': '6.8.0',
  'minimum_index_compatibility_version': '6.0.0-beta1'},
 'tagline': 'You Know, for Search'}

In [7]:
es.indices.refresh(INDEX_NAME)
count = es.cat.count(INDEX_NAME, params={"format": "json"})
print('Docs:', int(count[0]['count']))

Docs: 4926217


## Load Queries

In [8]:
def load_train_queries(filepath):
    """
    Load training queries from a file. 
    Returns a dictoinary with queryID as key and corresponding query, category and type.
    """
    query_dicts = {}
    with open(filepath, "r") as f:
      queries = f.read()
    
    for query in json.loads(queries):
        try:
            qID, qText, qCat, qType = query["id"].lower(), query["question"].lower(), query["category"].lower(), ' '.join(query["type"]).lower()
            if not 'dbo:' in qType: # Skip queries without a dbo: type.
                continue
            query_dicts[qID] = {"query": preprocess(qText), "category": qCat, "type": qType.replace('dbo:', '')}
        except Exception as e:
            # print("Query: {}\n\tThrew an exception: {}\n".format(query, e))
            continue
    return query_dicts

def load_test_queries(filepath):
    """
    Load test queries from a file.
    Returns a dictionary with queryID as key, and corresponding query as a string.
    """
    query_dicts = {}
    with open(filepath, "r") as f:
      queries = f.read()
    
    for query in json.loads(queries):
        try:
            query_dicts[query["id"].lower()] = {"query": preprocess(query["question"].lower())}
        except Exception as e:
            # print("Query: {}\n\tThrew an exception: {}\n".format(query, e))
            continue
    return query_dicts

In [9]:
training_queries = load_train_queries(QUERY_TRAIN_FILEPATH)
test_queries = load_test_queries(QUERY_TEST_FILEPATH)

print("# training queries:", len(training_queries), "\n\tExample key'dbpedia_17655' returns:", training_queries['dbpedia_17655'])
print("# test queries:", len(test_queries), "\n\tExample key'dbpedia_21099' contain:", test_queries['dbpedia_21099'])

# training queries: 9557 
	Example key'dbpedia_17655' returns: {'query': 'what town birthplace joseph greenberg', 'category': 'resource', 'type': 'city settlement populatedplace place location'}
# test queries: 4369 
	Example key'dbpedia_21099' contain: {'query': 'under which president did some politicians live kensington'}


## Load evaluation types

In [10]:
def loadDBPediaTypes():
    kv = {}
    max_depth = 0
    with open('./evaluation/dbpedia/dbpedia_types.tsv', 'r') as f:
        for i, line in enumerate(f):
            if i == 0: # Skip header
                continue
            line = line.strip().lower().split('\t')
            if len(line) != 3:
                continue
            type_name, depth, parent_type = line[0].split(':')[-1], int(line[1]), line[-1].split(':')[-1]
            if (len(type_name) == 0) or (len(parent_type) == 0):
                continue
            kv[type_name] = {'depth':depth, 'parent':parent_type}
            max_depth = max(depth, max_depth)
    return kv, max_depth

def getTypeHierarchy(kv, items, target):
    if not target in kv:
        return
    typeName, typeDepth = target, kv[target]['depth']
    items.append(typeName)
    getTypeHierarchy(kv, items, kv[target]['parent'])

def buildDBPediaTypeHierarchy(kv, target):
    items = [] # List of types, representing the hierarchy of the types related to the target.
    getTypeHierarchy(kv, items, target)
    return items[::-1] # Reverse the order to return the correct hierarchy where the first item = top level.

In [11]:
type_hierarchy, max_depth = loadDBPediaTypes()
print(list(type_hierarchy.keys())[:4], 'Max Depth', max_depth)

['basketballleague', 'naturalevent', 'province', 'lunarcrater'] Max Depth 7


In [12]:
buildDBPediaTypeHierarchy(type_hierarchy, 'comic') # Example hierarchy

['work', 'writtenwork', 'comic']

## Baseline Retrieval
Implements Okapi BM25, uses the Elastic search inbuilt implementation

In [13]:
def internal_BM25(query, k = 100, field = 'abstract', index = INDEX_NAME):
    """
    Perform baseline retrieval on a index using the inbuilt BM25 index

    Arguments:
        index: string
        query: string, space separated terms
        k: integer
    
    Returns:
        List of k first entity IDs(string)
    """
    hits = es.search(index=index, body={'query': {'match': {field: query}}}, _source=False, size=k).get('hits', {}).get('hits', {})
    hits_ids = [obj['_id'] for obj in hits]
    hits_types = [es.get(index=index, id=doc)["_source"].get("instance", "thing") for doc in hits_ids]
    return Counter([obj for obj in hits_types if len(obj) > 0]).most_common()
    
def internal_BM25_score(query, k = 100, field = 'abstract', index = INDEX_NAME):
    """
    Perform baseline retrieval on a index using the inbuilt BM25 index

    Arguments:
        index: string
        query: string, space separated terms
        k: integer
    
    Returns:
        List of k first entity IDs(string), and corresponding score(double)
    """
    hits = es.search(index=index, body={'query': {'match': {field: query}}}, _source=False, size=k).get('hits', {}).get('hits', {})
    hits.sort(key = lambda x: x['_score'], reverse=True)
    return {obj['_id']:obj['_score'] for obj in hits}

In [14]:
internal_BM25("civil rights")

[('thing', 49),
 ('person', 22),
 ('organisation', 9),
 ('officeholder', 4),
 ('governmentagency', 3),
 ('politician', 2),
 ('academicjournal', 2),
 ('politicalparty', 2),
 ('non profitorganisation', 2),
 ('company', 1),
 ('museum', 1),
 ('saint', 1),
 ('writer', 1),
 ('ambassador', 1)]

In [15]:
internal_BM25_score("civil rights", k=5)

{'leadership conference on civil and human rights': 14.680483,
 'civil rights commission puerto rico': 14.557869,
 'lawyers committee for civil rights under law': 14.492786,
 'chicano movement': 14.249865,
 'civil rights act': 14.124385}

In [16]:
def analyze_query(es, query, index=INDEX_NAME, field = 'abstract'):
    """Analyzes a query with respect to the relevant index. 
    
    Arguments:
        es: Elasticsearch object instance.
        query: String of query terms.
        field: The field with respect to which the query is analyzed. 
        index: Name of the index with respect to which the query is analyzed.  
    
    Returns:
        A list of query terms that exist in the specified field among the documents in the index. 
    """
    tokens = es.indices.analyze(index=index, body={'text': query})['tokens']
    query_terms = []
    for t in sorted(tokens, key=lambda x: x['position']):
        ## Use a boolean query to find at least one document that contains the term.
        hits = es.search(index=index, body={'query': {'match': {field: t['token']}}}, 
                                   _source=False, size=1).get('hits', {}).get('hits', {})
        doc_id = hits[0]['_id'] if len(hits) > 0 else None
        if doc_id is None:
            continue
        query_terms.append(t['token'])
    return query_terms

def evaluate_baseline(es, amount=0, index=INDEX_NAME):
    """
    Evaluate the BM25 baseline on our train queries.
    """
    progress, N = 0, len(training_queries)
    results = {}
    for qId, queryObject in training_queries.items():
        query = analyze_query(es, queryObject['query'], index)
        hits = es.search(index=index, _source=True, size=10,
            body={"query": {"bool": {"must": {"match": {"abstract": ' '.join(query)}}, "must_not": {"match": {"instance": "thing"}}}}}
        )['hits']['hits']
        hits_types = [obj['_source']['instance'] for obj in hits]
        results[qId] = {
            'type': hits_types,
            'category': 'resource',
            'match': max([(1 if (t in queryObject['type']) else 0) for t in hits_types]) # Yes / No was there an explicit match?
        }

        progress += 1
        if (progress % 50) == 0:
            print('Progress - {}/{} queries handled.'.format(progress, N))

        if amount and (progress >= amount):
            break

    return results

def tokens_to_vec(tokens, model):
    """
    Convert a list of tokens to some word 2 vec representation which 
    conforms to our model.
    """
    size = model.wv.vectors.shape[1]
    if len(tokens) == 0:
        return np.zeros(size)        
    embeddings = []
    for v in tokens:
        embeddings.append((model.wv.word_vec(v) if (v in model.wv.vocab) else np.random.rand(size)))
    return np.mean(embeddings, axis=0)

def evaluate_advanced(es, model, k=1000, amount=0, index=INDEX_NAME):
    """
    Evaluate our advanced method, re-rank the documents using word2vec.
    We are using pre-trained embeddings. Convert each query and related doc to word2vec format,
    compare the similarity and re-rank the entries.
    """
    progress, N = 0, len(training_queries)
    results = {}
    for qId, queryObject in training_queries.items():
        query = analyze_query(es, queryObject['query'], index)
        hits = es.search(index=index, _source=True, size=k, 
            body={"query": {"bool": {"must": {"match": {"abstract": ' '.join(query)}}, "must_not": {"match": {"instance": "thing"}}}}}
        )['hits']['hits']
        queryEmbedding = tokens_to_vec(query, model).reshape(1, -1)
        rerank = []
        for obj in hits:
            docEmbedding = tokens_to_vec(obj['_source']['abstract'].split(), model).reshape(1, -1)
            sim = cosine_similarity(queryEmbedding, docEmbedding).item()
            rerank.append((obj['_source']['instance'], sim))
            
        rerank.sort(key=lambda x:x[-1], reverse=True) # Re-rank the initial hits using our word2vec mdl.
        results[qId] = {
            'type': [v for v,_ in rerank[:10]],
            'category': 'resource',
            'match': max([(1 if (t in queryObject['type']) else 0) for t,_ in rerank[:10]]) # Yes / No was there an explicit match?
        }

        progress += 1
        if (progress % 50) == 0:
            print('Progress - {}/{} queries handled.'.format(progress, N))

        if amount and (progress >= amount):
            break

    return results

In [17]:
def dcg(gains, k=5):
    """
    Computes DCG for a given ranking.
    Traditional DCG formula: DCG_k = sum_{i=1}^k gain_i / log_2(i+1).
    """
    dcg = 0
    for i in range(0, min(k, len(gains))):
        dcg += gains[i] / math.log(i + 2, 2)
    return dcg

def ndcg(gains, ideal_gains, k=5):
    """Computes NDCG given gains for a ranking as well as the ideal gains."""
    try:
        return dcg(gains, k) / dcg(ideal_gains, k)
    except:
        return 0

def get_type_path(type, type_hierarchy):
    """
    Gets the type's path in the hierarchy (excluding the root type, like owl:Thing).
    The path for each type is computed only once then cached in type_hierarchy,
    to save computation.
    """
    if not type in type_hierarchy:
        type_hierarchy[type] = {'depth':1, 'parent':''}

    if 'path' not in type_hierarchy[type]:
        type_path = []
        current_type = type
        while current_type in type_hierarchy:
            type_path.append(current_type)
            current_type = type_hierarchy[current_type]['parent']
        type_hierarchy[type]['path'] = type_path
    return type_hierarchy[type]['path']

def get_type_distance(type1, type2, type_hierarchy):
    """
    Computes the distance between two types in the hierarchy.
    Distance is defined to be the number of steps between them in the hierarchy,
    if they lie on the same path (which is 0 if the two types match), and
    infinity otherwise.
    """
    type1_path = get_type_path(type1, type_hierarchy)
    type2_path = get_type_path(type2, type_hierarchy)
    distance = math.inf
    if type1 in type2_path:
        distance = type2_path.index(type1)
    if type2 in type1_path:
        distance = min(type1_path.index(type2), distance)
    return distance

def get_most_specific_types(types, type_hierarchy):
    """Filters a set of input types to most specific types w.r.t the type
    hierarchy; i.e., super-types are removed."""
    filtered_types = set(types)
    for type in types:
        type_path = get_type_path(type, type_hierarchy)
        for supertype in type_path[1:]:
            if supertype in filtered_types:
                filtered_types.remove(supertype)
    return filtered_types

def get_expanded_types(types, type_hierarchy):
    """Expands a set of types with both more specific and more generic types
    (i.e., all super-types and sub-types)."""
    expanded_types = set()
    for type in types:
        # Adding all supertypes.
        expanded_types.update(get_type_path(type, type_hierarchy))
        # Adding all subtypes (NOTE: this bit could be done more efficiently).
        for type2 in type_hierarchy:
            if type_hierarchy[type2]['depth'] <= type_hierarchy[type]['depth']:
                continue
            type2_path = get_type_path(type2, type_hierarchy)
            if type in type2_path:
                expanded_types.update(type2_path)
    return expanded_types

def compute_type_gains(predicted_types, gold_types, type_hierarchy, max_depth):
    """Computes gains for a ranked list of type predictions.

    Following the definition of Linear gain in (Balog and Neumayer, CIKM'12),
    the gain for a given predicted type is 0 if it is not on the same path with
    any of the gold types, and otherwise it's $1-d(t,t_q)/h$ where $d(t,t_q)$ is
    the distance between the predicted type and the closest matching gold type
    in the type hierarchy and h is the maximum depth of the type hierarchy.

    Args:
        predicted_types: Ranked list of predicted types.
        gold_types: List/set of gold types (i.e., perfect answers).
        type_hierarchy: Dict with type hierarchy.
        max_depth: Maximum depth of the type hierarchy.

    Returns:
        List with gain values corresponding to each item in predicted_types.
    """
    gains = []
    expanded_gold_types = get_expanded_types(gold_types, type_hierarchy)
    for predicted_type in predicted_types:
        if predicted_type in expanded_gold_types:
            # Since not all gold types may lie on the same branch, we take the
            # closest gold type for determining distance.
            min_distance = math.inf
            for gold_type in gold_types:
                min_distance = min(get_type_distance(predicted_type, gold_type,
                                                     type_hierarchy),
                                   min_distance)
            gains.append(1 - min_distance / max_depth)
        else:
            gains.append(0)
    return gains

def evaluate(result):
	accuracy, ndcg_5, ndcg_10 = [], [], []
	for qId, obj in training_queries.items():
		if qId not in result:
			continue
		
		qTypes = obj['type'].split(' ')
		if len(qTypes) == 0:
			continue
			
		predicted_category = result[qId].get('category', None)
		predicted_type = result[qId].get('type', [None])
		accuracy.append(result[qId].get('match', 0))

		# Filters obj types to most specific ones in the hierarchy.
		obj_types = get_most_specific_types(qTypes, type_hierarchy)
		gains = compute_type_gains(predicted_type, obj_types, type_hierarchy, max_depth)
		ideal_gains = sorted(
				compute_type_gains(
				get_expanded_types(obj_types, type_hierarchy), obj_types,
				type_hierarchy, max_depth), reverse=True)

		ndcg_5.append(ndcg(gains, ideal_gains, k=5))
		ndcg_10.append(ndcg(gains, ideal_gains, k=10))

	print('Evaluation results:')
	print('-------------------')
	print('Category prediction (based on {} questions)'.format(len(accuracy)))
	print('  Accuracy: {:5.3f}'.format(sum(accuracy) / len(accuracy)))
	print('Type ranking (based on {} questions)'.format(len(ndcg_5)))
	print('  NDCG@5:  {:5.3f}'.format(sum(ndcg_5) / len(ndcg_5)))
	print('  NDCG@10: {:5.3f}'.format(sum(ndcg_10) / len(ndcg_10)))

def write_result_to_file(res, file):
    with open('./results/{}.csv'.format(file), 'w') as f:
        for qId, obj in res.items():
            f.write('{},{},{}\n'.format(qId, obj['match'], ' '.join(obj['type'])))

def read_result_from_file(file):
    result = {}
    with open('./results/{}.csv'.format(file), 'r') as f:
        for line in f:
            line = line.strip().split(',')
            if len(line) != 3:
                continue
            result[line[0]] = {
                'type': [v for v in line[-1].split(' ') if len(v) > 0],
                'category': 'resource',
                'match': int(line[1])
            }
    return result

### Evaluate baseline

In [21]:
res_baseline = evaluate_baseline(es, amount=1000)
write_result_to_file(res_baseline, 'baseline')

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Progress - 150/9557 queries handled.
Progress - 200/9557 queries handled.
Progress - 250/9557 queries handled.
Progress - 300/9557 queries handled.
Progress - 350/9557 queries handled.
Progress - 400/9557 queries handled.
Progress - 450/9557 queries handled.
Progress - 500/9557 queries handled.
Progress - 550/9557 queries handled.
Progress - 600/9557 queries handled.
Progress - 650/9557 queries handled.
Progress - 700/9557 queries handled.
Progress - 750/9557 queries handled.
Progress - 800/9557 queries handled.
Progress - 850/9557 queries handled.
Progress - 900/9557 queries handled.
Progress - 950/9557 queries handled.
Progress - 1000/9557 queries handled.


In [22]:
res_baseline = read_result_from_file('baseline')
evaluate(res_baseline)

Evaluation results:
-------------------
Category prediction (based on 1000 questions)
  Accuracy: 0.474
Type ranking (based on 1000 questions)
  NDCG@5:  0.309
  NDCG@10: 0.395


### Evaluate advanced

In [23]:
model = KeyedVectors.load_word2vec_format(EMBEDDING_FILE)

In [24]:
res_adv = evaluate_advanced(es, model, k=1000, amount=1000)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Progress - 150/9557 queries handled.
Progress - 200/9557 queries handled.
Progress - 250/9557 queries handled.
Progress - 300/9557 queries handled.
Progress - 350/9557 queries handled.
Progress - 400/9557 queries handled.
Progress - 450/9557 queries handled.
Progress - 500/9557 queries handled.
Progress - 550/9557 queries handled.
Progress - 600/9557 queries handled.
Progress - 650/9557 queries handled.
Progress - 700/9557 queries handled.
Progress - 750/9557 queries handled.
Progress - 800/9557 queries handled.
Progress - 850/9557 queries handled.
Progress - 900/9557 queries handled.
Progress - 950/9557 queries handled.
Progress - 1000/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 1000 questions)
  Accuracy: 0.517
Type ranking (based on 1000 questions)
  NDCG@5:  0.397
  NDCG@10: 0.501


## Different model dimensionality
Run on 100queries, different dimensionality on model

In [31]:
res_baseline = evaluate_baseline(es, amount=100)
evaluate(res_baseline)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.410
Type ranking (based on 100 questions)
  NDCG@5:  0.309
  NDCG@10: 0.381


In [32]:

model = KeyedVectors.load_word2vec_format(datapath(os.getcwd()+'/datasets/gensim/gensim.6B.50d.txt'))
res_adv = evaluate_advanced(es, model, k=100, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.470
Type ranking (based on 100 questions)
  NDCG@5:  0.331
  NDCG@10: 0.430


In [33]:
model = KeyedVectors.load_word2vec_format(datapath(os.getcwd()+'/datasets/gensim/gensim.6B.100d.txt'))
res_adv = evaluate_advanced(es, model, k=100, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.470
Type ranking (based on 100 questions)
  NDCG@5:  0.341
  NDCG@10: 0.441


In [34]:
model = KeyedVectors.load_word2vec_format(datapath(os.getcwd()+'/datasets/gensim/gensim.6B.200d.txt'))
res_adv = evaluate_advanced(es, model, k=100, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.460
Type ranking (based on 100 questions)
  NDCG@5:  0.325
  NDCG@10: 0.428


In [35]:
model = KeyedVectors.load_word2vec_format(datapath(os.getcwd()+'/datasets/gensim/gensim.6B.300d.txt'))
res_adv = evaluate_advanced(es, model, k=100, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.480
Type ranking (based on 100 questions)
  NDCG@5:  0.315
  NDCG@10: 0.428


## Different amount of collected documents

In [38]:
model = KeyedVectors.load_word2vec_format(datapath(os.getcwd()+'/datasets/gensim/gensim.6B.100d.txt'))

res_baseline = evaluate_baseline(es, amount=100)
evaluate(res_baseline)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.410
Type ranking (based on 100 questions)
  NDCG@5:  0.309
  NDCG@10: 0.381


In [37]:
res_adv = evaluate_advanced(es, model, k=1, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.190
Type ranking (based on 100 questions)
  NDCG@5:  0.114
  NDCG@10: 0.100


In [39]:
res_adv = evaluate_advanced(es, model, k=5, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.320
Type ranking (based on 100 questions)
  NDCG@5:  0.304
  NDCG@10: 0.260


In [40]:
res_adv = evaluate_advanced(es, model, k=10, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.410
Type ranking (based on 100 questions)
  NDCG@5:  0.285
  NDCG@10: 0.371


In [41]:
res_adv = evaluate_advanced(es, model, k=25, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.440
Type ranking (based on 100 questions)
  NDCG@5:  0.335
  NDCG@10: 0.430


In [42]:
res_adv = evaluate_advanced(es, model, k=50, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.450
Type ranking (based on 100 questions)
  NDCG@5:  0.340
  NDCG@10: 0.444


In [43]:
res_adv = evaluate_advanced(es, model, k=100, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.470
Type ranking (based on 100 questions)
  NDCG@5:  0.332
  NDCG@10: 0.430


In [44]:
res_adv = evaluate_advanced(es, model, k=250, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.440
Type ranking (based on 100 questions)
  NDCG@5:  0.327
  NDCG@10: 0.420


In [45]:
res_adv = evaluate_advanced(es, model, k=500, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.440
Type ranking (based on 100 questions)
  NDCG@5:  0.336
  NDCG@10: 0.437


In [46]:
res_adv = evaluate_advanced(es, model, k=1000, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.420
Type ranking (based on 100 questions)
  NDCG@5:  0.343
  NDCG@10: 0.456


In [47]:
res_adv = evaluate_advanced(es, model, k=5000, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.440
Type ranking (based on 100 questions)
  NDCG@5:  0.336
  NDCG@10: 0.437


In [48]:
res_adv = evaluate_advanced(es, model, k=10000, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.480
Type ranking (based on 100 questions)
  NDCG@5:  0.327
  NDCG@10: 0.438


In [49]:
model = KeyedVectors.load_word2vec_format(datapath(os.getcwd()+'/datasets/gensim/gensim.6B.300d.txt'))
res_adv = evaluate_advanced(es, model, k=10000, amount=100)
evaluate(res_adv)

Progress - 50/9557 queries handled.
Progress - 100/9557 queries handled.
Evaluation results:
-------------------
Category prediction (based on 100 questions)
  Accuracy: 0.480
Type ranking (based on 100 questions)
  NDCG@5:  0.339
  NDCG@10: 0.453
