# SMART
This file loads the trained category and type classifiers. Then predicts and evaluates the results.

In [1]:
import utils
import pickle
import numpy as np
import gensim 
import json
from evaluation.dbpedia import evaluate # evaluation script
from gensim.models import Word2Vec 
from nltk.tokenize import word_tokenize 
from nltk.corpus import stopwords 
from elasticsearch import Elasticsearch

## Classify the category

In [2]:
clf_category_baseline = pickle.load(open('category_classifier_baseline.sav', 'rb'))

In [3]:
test = utils.load_dataset('datasets/DBpedia/test_grnd.json')

In [4]:
_, _, X_test, y_test = utils.prepare_X_y({}, test)
test_vectors = utils.transform_dataset(X_test, 'category_vectorizer.sav')

In [5]:
pred_category = clf_category_baseline.predict(test_vectors)
print("Baseline accuracy:", sum(pred_category==y_test)/len(pred_category))

Baseline accuracy: 0.9226380648105887


## Classify the type

In [6]:
clf_type_literal = pickle.load(open('type_literal_classifier.sav', 'rb'))

In [7]:
bool_map, literal_map, resource_map = utils.split_bool_literal_reference(X_test, pred_category)

In [8]:
X_test_literal = list(literal_map.values())
y_test_literal = []
for i in list(literal_map.keys()):
    y_test_literal.append(test[i]['type'][0])

In [9]:
test_vectors_literal = utils.transform_dataset(X_test_literal, 'literal_vectorizer.sav')

In [10]:
pred_literal = clf_type_literal.predict(test_vectors_literal)
print("Accuracy:", sum(pred_literal==y_test_literal)/len(pred_literal))

Accuracy: 0.8787128712871287


## Bundle

In [11]:
es = Elasticsearch()
INDEX_NAME = 'questions'

In [12]:
stop_words = set(stopwords.words('english'))
word2vec_sg = pickle.load(open('word2vec_sg.sav', 'rb'))

In [13]:
def predict_resource_types(es, testdata, index, model, index_name='questions', size=5, threshold=0.85):
    q = testdata[index]['question']
    q = " ".join([w for w in word_tokenize(q)  if not w in stop_words])
    results = []
    try:
        results = es.search(index=index_name, q=q, _source=True, size=size).get('hits', {}).get('hits', {})
    except:
        return []
    types = set([])
    for result in results:
        for t in result['_source']['type']:
            if len(t.split()) == 1:
                types.add(t)

    q_terms = q.split() # Question terms
    t_terms = list(types) # Types from search
    pred_types = set([])
     

    for i in q_terms:
        for j in t_terms:
            try:
                score = model.wv.similarity(i, j[4:])
                if score > threshold:
                    pred_types.add(j)
            except:
                pass

    pred_types = list(pred_types)
    pred_types.sort()
    return pred_types

In [14]:
predict_resource_types(es, test, 12, word2vec_sg)

['dbo:Asteroid', 'dbo:CelestialBody', 'dbo:Planet']

In [15]:
type_preds = []
count_literal = 0
for i in range(len(test)):
    
    if i in bool_map:
        type_preds.append(['boolean'])
    if i in literal_map:
        type_preds.append([pred_literal[count_literal]])
        count_literal += 1
    if i in resource_map:
        type_preds.append(predict_resource_types(es, test, i, word2vec_sg))

In [16]:
y_test_types = []
    
for doc in test:
    y_test_types.append(doc['type'])

In [17]:
preds = utils.load_dataset('datasets/DBpedia/test.json')
for i, doc in enumerate(preds):
    
    if i in bool_map:
        doc['category'] = 'boolean'
    if i in literal_map:
        doc['category'] = 'literal'
    if i in resource_map:
        doc['category'] = 'resource'
    doc['type'] = type_preds[i]

with open('datasets/DBpedia/preds_baseline.json', 'w') as file:
    json.dump(preds, file, indent=4)

In [18]:
%run evaluation/dbpedia/evaluate.py evaluation/dbpedia/dbpedia_types.tsv datasets/DBpedia/test_grnd.json datasets/DBpedia/preds_baseline.json

Loading type hierarchy from evaluation/dbpedia/dbpedia_types.tsv... 760 types loaded (max depth: 7)
Loading ground truth from datasets/DBpedia/test_grnd.json... 
   4367 questions loaded
Loading system predictions from datasets/DBpedia/preds_baseline.json... 
   4367 predictions loaded




Evaluation results:
-------------------
Category prediction (based on 4367 questions)
  Accuracy: 0.922
Type ranking (based on 3397 questions)
  NDCG@5:  0.547
  NDCG@10: 0.536


In [19]:
temp = utils.load_dataset('datasets/DBpedia/preds_baseline.json')

# Print average number of answer types
c = 0
n= 0
for doc in temp:
    if doc['category'] == 'resource':
        c += len(doc['type'])
        n += 1
print("Average amount of predicted types for a question:", c/n)

Average amount of predicted types for a question: 2.993645750595711
