In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import os 

import config

In [2]:
import os 
from wiki_database import WikiDatabaseSqlite
from doc_results_db import ClaimTensorDatabase

claim_data_set = 'dev'
path_wiki_pages = os.path.join(config.ROOT, config.DATA_DIR, config.WIKI_PAGES_DIR, 'wiki-pages')
path_wiki_database_dir = os.path.join(config.ROOT, config.DATA_DIR, config.DATABASE_DIR)

setup = 1

claim_tensor_db = ClaimTensorDatabase(path_wiki_pages, path_wiki_database_dir, setup)
wiki_database = WikiDatabaseSqlite(path_wiki_database_dir, path_wiki_pages)

wiki_database
- Load existing settings file
- Load title dictionary


In [3]:
from neural_network import NeuralNetwork
# load model
# === variables === #
claim_data_set = 'dev'
method_database = 'equal_class' # include_all, equal_class
setup = 1
settings_model = {}
settings_model['fraction_training'] = 0.9
settings_model['use_cuda'] = False
settings_model['seed'] = 1
settings_model['lr'] = 0.001
settings_model['momentum'] = 0.9 # 0.5
settings_model['params'] = {'batch_size': 64, 'shuffle': True}
settings_model['nr_epochs'] = 10
settings_model['log_interval'] = 10
settings_model['width'] = 2000

neural_network = NeuralNetwork(claim_data_set, method_database, setup, settings_model)
    
model_nn = neural_network.model


DataNeuralNetwork
- data already created
NeuralNetwork
- load model


In [4]:
from utils_doc_results import ClaimDatabase

claim_database = ClaimDatabase(path_dir_database = claim_tensor_db.path_dir_claim_database, 
                               path_raw_data = claim_tensor_db.path_raw_claim_data, 
                               claim_data_set = claim_tensor_db.claim_data_set)


claim database
- claim database already exists


In [15]:
from utils_db import dict_load_json, dict_save_json
from utils_doc_results import Claim
from tqdm import tqdm 
from doc_results_db import ClaimFile

class PredictLabels():
    def __init__(self, K, threshold, method, claim_tensor_db, wiki_database, neural_network):
        # --- process input --- #
        self.K = K
        self.threshold = threshold
        self.method = method
        self.claim_tensor_db = claim_tensor_db
        self.model_nn = neural_network.model
        # --- variables --- #
        self.nr_claims = self.claim_tensor_db.settings['nr_total']
        self.path_predict_label_dir = os.path.join(claim_tensor_db.path_setup_dir, 'Predictions_' + str(K) + '_' + method + '_' + neural_network.file_name)
        mkdir_if_not_exist(self.path_predict_label_dir)
        self.path_settings = os.path.join(self.path_predict_label_dir, 'settings.json')
        
        if os.path.isfile(self.path_settings):
            self.settings = dict_load_json(self.path_settings)
        else:
            self.settings = {}
            self.get_accuracy_save_results(wiki_database)
            dict_save_json(self.settings, self.path_settings)
        
    def get_accuracy_save_results(self, wiki_database):
        nr_correct = 0
        nr_documents_selected = 0
        
        for id_nr in tqdm(range(self.nr_claims)):
            path_file = os.path.join(self.claim_tensor_db.path_dict_variable_list_dir, str(id_nr) + '.json')
            dict_variables = dict_load_json(path_file)
            id = dict_variables['id']
            selected_documents_list = list(dict_variables['selected_documents'].keys())
            pred_value_list = []
            for selected_document_str in selected_documents_list:
                flag_process = 0
                if method == 'generated':
                    if int(selected_document_str) in dict_variables['ids_generated']:
                        flag_process = 1
                elif method == 'correct':
                    if int(selected_document_str) in dict_variables['ids_correct_docs']:
                        flag_process = 1
                elif method == 'selected':
                    flag_process = 1
                else:
                    raise ValueError('method not in method_list', method)
                    
                if flag_process == 1:
                    variable_list = dict_variables['selected_documents'][selected_document_str]['list_variables']
                    variable_tensor = torch.FloatTensor(variable_list)
                    pred_value_list += [self.model_nn(variable_tensor.unsqueeze(0)).item()]
                    if 'predicted_true' in dict_variables:
                        dict_variables['predicted_true'].append(selected_document_str) 
                    else:
                        dict_variables['predicted_true'] = selected_document_str
                        
            pred_value_list_sorted = [x for x,_ in sorted(zip(pred_value_list, selected_documents_list))]
            pred_id_correct_list = [x for _,x in sorted(zip(pred_value_list, selected_documents_list))]
            id_correct_list = []
            length_list = len(pred_id_correct_list)

            for i in range(length_list):
                if i<self.K:
                    id_correct_list.append(pred_id_correct_list[length_list-1-i])
                elif pred_value_list_sorted[length_list-1-i] > self.threshold:
                    id_correct_list.append(pred_id_correct_list[length_list-1-i])
                else:
                    break
                nr_documents_selected += 1

            file = ClaimFile(id = id, path_dir_files = self.claim_tensor_db.path_claims_dir)
            claim_dict = claim_database.get_claim_from_id(id)
            claim = Claim(claim_dict)
            for interpreter in claim.evidence:
                flag_correctly_predicted = True
                for proof in interpreter:
                    title_proof = proof[2]
                    if title_proof == None:
                        raise ValueError('should contain proof')
                    else:
                        id_proof = wiki_database.get_id_from_title(title_proof)
                        if str(id_proof) not in id_correct_list:
                            flag_correctly_predicted = False
                            break
                if flag_correctly_predicted == True:
                    nr_correct += 1
                    break
        
        print('accuracy', accuracy, nr_documents_selected)
        self.settings['accuracy'] = nr_correct / float(nr_claims)
        self.settings['nr_documents_selected'] = nr_documents_selected
        


In [16]:
import os 
from wiki_database import WikiDatabaseSqlite
from doc_results_db import ClaimTensorDatabase

# claim_data_set = 'dev'
path_wiki_pages = os.path.join(config.ROOT, config.DATA_DIR, config.WIKI_PAGES_DIR, 'wiki-pages')
path_wiki_database_dir = os.path.join(config.ROOT, config.DATA_DIR, config.DATABASE_DIR)

setup = 1

claim_tensor_db = ClaimTensorDatabase(path_wiki_pages, path_wiki_database_dir, setup)
wiki_database = WikiDatabaseSqlite(path_wiki_database_dir, path_wiki_pages)

method_list = ['generated', 'correct', 'selected']
method = 'generated'
K = 0
threshold = 0.2

# nr_claims = claim_tensor_db.settings['nr_total']

predict_labels_db = PredictLabels(K, threshold, method, claim_tensor_db, wiki_database, neural_network)


wiki_database
- Load existing settings file
- Load title dictionary


KeyboardInterrupt: 