-
Notifications
You must be signed in to change notification settings - Fork 0
/
Fill_in_test_data.py
69 lines (52 loc) · 2.38 KB
/
Fill_in_test_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 06 15:39:29 2014
@author: Johannes
"""
import utils
import os
import perceptron_sketch as perc
import feature_vector
import cPickle
import json
#for classification of error types:
#test_files_list =['C:/Python27/aaa_UCL/Natural Language Processing/assignment2/PMID-1653950.json']
#for running the test data:
test_path_inputs ='C:/Python27/aaa_UCL/Natural Language Processing/assignment2/bionlp2011genia-statnlp-test-clean/*.json'
test_files_output_dir = 'C:/Python27/aaa_UCL/Natural Language Processing/assignment2/predictions/'
test_files_list = utils.list_files(path=test_path_inputs)
if not os.path.exists(test_files_output_dir):
os.makedirs(test_files_output_dir)
evaluate_test_list = test_files_list
FV_arg = feature_vector.FeatureVector('argument')
FV_trig = feature_vector.FeatureVector('trigger')
#load weights of pretrained perceptron.
with open('Perceptron_trigger.data', 'rb') as f:
Lambda_e, misc_e = cPickle.load(f)
with open('Perceptron_argument.data', 'rb') as f:
Lambda_a, misc_a = cPickle.load(f)
for i_f,test_file in enumerate(evaluate_test_list):
print 'Test File', i_f, 'of' , len(evaluate_test_list)
#generate predictions for current file, p_e and p_a are the predicted values.
(p_e, g_e) = perc.test_perceptron(FV_trig, Lambda_e, [test_file], mode='Trigger')
(p_a, g_a) = perc.test_perceptron(FV_arg, Lambda_a, [test_file], mode='Argument')
f_fill_this = utils.load_json_file(test_file)
counter_e = 0
counter_a = 0
for sentence in f_fill_this['sentences']:
event_candidates = sentence['eventCandidates']
for ec in event_candidates:
ec['predicted'] = FV_trig.trigger_list[p_e[counter_e]]
counter_e +=1
for arg in ec['arguments']:
arg['predicted'] = FV_arg.arguments_list[p_a[counter_a]]
counter_a +=1
if counter_e != len(p_e):
print 'PROBLEM: LENGTH OF PREDICTION VECTOR (trigger) DOESNT FIT!'
if counter_a != len(p_a):
print 'PROBLEM: LENGTH OF PREDICTION VECTOR (argument) DOESNT FIT!'
#save resulting dictionary to output file
output_file_name = test_file.split('\\')[-1]
output_path = test_files_output_dir + output_file_name
with open(output_path, 'wb') as f_out:
json.dump(f_fill_this, f_out)