# Voting Systems

In this notebook we compare different voting systems to get the final prediction. Our models were trained on the blind test data, that is not biased by the training process and use the dev-test data to test our model.

## Load Test Data and Models

In [24]:
from read_write_files import read_json,save_json
from helper_functions import get_sense_lists,align_parsers_to_gold
import numpy as np

In [25]:
sense_model_path = "data/project_files/blind/sense_model.json"
test_data_path = "data/gold_standard/dev/gold.json"

In [26]:
parsers = [
    ("oslopots","data/submissions/randomized/dev/oslopots.json"),
    ("nguyenlab","data/submissions/randomized/dev/nguyenlab.json"),
    ("steven","data/submissions/randomized/dev/steven.json"),
]

In [28]:
parser_predictions = []

for name,path in parsers:
    parser_predictions += [(name,read_json(path))]

In [29]:
total_alignment,not_mappable = align_parsers_to_gold(test_data,parser_predictions)

In [40]:
gold_senses,parser_senses,parser_names = get_sense_lists(total_alignment.values())

## Voting Systems

In [27]:
sense_model = read_json(sense_model_path)
test_data = read_json(test_data_path)

In [82]:
def voting(parser_preds,parser_names,model,voting_algorithm):
    new_senses = []
    
    parser_pred_zip = zip(*parser_preds)
    for predictions in parser_pred_zip:
        result = voting_algorithm(predictions,model)
        new_senses += [result]
        
    return new_senses

In [87]:
def best_wins_voting(predictions,model):
    probs = []
    for ind,pred in enumerate(predictions):
        sense_dic = model[ind]["sense_pred"]
        if (pred == "None") or not (sense_dic.has_key(pred)):
            probs += [0]
        else:
            probs += [model[ind]["sense_pred"][pred]["f1"]]
    return np.argmax(probs)

In [88]:
best_parser_indexes = voting(parser_senses,parser_names,sense_model,best_wins_voting)

## Exchange new Attributes in Relation File

In [97]:
def exchange_attr_values(alignment_list,best_relation_indexes):
    new_relations = []
    
    for alignments in alignment_list.values():
        print(alignments)
        new_rel = alignments["parsers"][best_relation_indexes]
        new_relations.append(rel)
        
    return new_relations

In [98]:
new_relations = exchange_attr_values(total_alignment,best_parser_indexes)

{'parsers': [{u'DocID': u'wsj_2200', u'Arg1': {u'TokenList': [37, 38, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80], 'TokenIndexSet': [37, 38, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80]}, u'Arg2': {u'TokenList': [85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100], 'TokenIndexSet': [96, 97, 98, 99, 100, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95]}, u'Parser': u'oslopots', u'Connective': {u'RawText': u'', u'TokenList': []}, u'Sense': [u'Expansion.Conjunction'], u'Type': u'Implicit', u'ID': 35708, u'Change': u'Type'}, {u'DocID': u'wsj_2200', u'Arg1': {u'CharacterSpanList': [[253, 261], [323, 498]], u'RawText': u"The bill would prevent the Resolution Trust Corp. from raising temporary working capital by having an RTC-owned bank or thrift issue debt that wouldn't be counted on the federal budget", u'TokenList': [37, 38, 52, 53, 54, 55, 

TypeError: list indices must be integers, not list

In [94]:
total_alignment.values()[0]["parsers"][1]

{u'Arg1': {u'CharacterSpanList': [[253, 261], [323, 498]],
  u'RawText': u"The bill would prevent the Resolution Trust Corp. from raising temporary working capital by having an RTC-owned bank or thrift issue debt that wouldn't be counted on the federal budget",
  'TokenIndexSet': [37,
   38,
   52,
   53,
   54,
   55,
   56,
   57,
   58,
   59,
   60,
   61,
   62,
   63,
   64,
   65,
   66,
   67,
   68,
   69,
   70,
   71,
   72,
   73,
   74,
   75,
   76,
   77,
   78,
   79,
   80],
  u'TokenList': [37,
   38,
   52,
   53,
   54,
   55,
   56,
   57,
   58,
   59,
   60,
   61,
   62,
   63,
   64,
   65,
   66,
   67,
   68,
   69,
   70,
   71,
   72,
   73,
   74,
   75,
   76,
   77,
   78,
   79,
   80]},
 u'Arg2': {u'CharacterSpanList': [[517, 629]],
  u'RawText': u'to restrict the RTC to Treasury borrowings only, unless the agency receives specific congressional authorization',
  'TokenIndexSet': [96,
   97,
   98,
   99,
   100,
   85,
   86,
   87,
   88,
   89,
   9