# Parser Gold Mapping

This notebook maps all relations to each other taking into account that there are some that where not predicted.

In [150]:
import json
import numpy as np
import conll16st.aligner as aligner
import conll16st.partial_scorer as ps
import read_write_files as rw

In [186]:
def change_sets_to_list(alignments):
    for gold,pred in alignments:
        if gold != None:
            gold["Arg1"]["TokenIndexSet"] = list(gold["Arg1"]["TokenIndexSet"])
            gold["Arg2"]["TokenIndexSet"] = list(gold["Arg2"]["TokenIndexSet"])
        if pred != None:
            pred["Arg1"]["TokenIndexSet"] = list(pred["Arg1"]["TokenIndexSet"])
            pred["Arg2"]["TokenIndexSet"] = list(pred["Arg2"]["TokenIndexSet"])
        
    return alignments

In [189]:
def align_parsers_to_gold(gold_rel,parsers):
    total_alignment = {gold["ID"]:{"gold":gold,"parsers":[]} for gold in gold_rel}
    parsers_not_mappable = []
    
    parser_names = [name for name,parser in parsers]
    
    for name,parser_relations in parsers:
        arg1_alignment, arg2_alignment, relation_alignment = aligner.align_relations(
            gold_rel, 
            parser_relations, 
            0.7)
         
        relation_alignment = change_sets_to_list(relation_alignment)
        for gold_align,pred_align in relation_alignment:
            if gold_align == None:
                parsers_not_mappable += [pred_align]
            else:
                total_alignment[gold_align["ID"]]["parsers"] += [pred_align]
                total_alignment[gold_align["ID"]]["parser_names"] = parser_names
     
    return total_alignment,parsers_not_mappable
        

# Training Set Mapping

In [194]:
gold_path = "data/gold_standard/blind/gold.json"
gold_list = rw.read_json(gold_path)

In [195]:
parsers = [
    ("oslopots","data/submissions/randomized/blind/oslopots.json"),
    ("nguyenlab","data/submissions/randomized/blind/nguyenlab.json"),
    ("steven","data/submissions/randomized/blind/steven.json")
]

In [196]:
predicted_lists = []

for name,path in parsers:
    predicted_list = rw.read_json(path)
    predicted_lists += [(name,predicted_list)]

In [197]:
total_alignment,not_mappables = align_parsers_to_gold(
    gold_rel=gold_list,
    parsers=predicted_lists)

In [201]:
save_json(total_alignment.values(),"data/project_files/blind/total_alignment.json")
save_json(not_mappables,"data/project_files/blind/not_mappable.json")

# Test Set Mapping

In [207]:
gold_path = "data/gold_standard/dev/gold.json"
gold_list = rw.read_json(gold_path)

In [208]:
parsers = [
    ("oslopots","data/submissions/randomized/dev/oslopots.json"),
    ("nguyenlab","data/submissions/randomized/dev/nguyenlab.json"),
    ("steven","data/submissions/randomized/dev/steven.json")
]

In [209]:
predicted_lists = []

for name,path in parsers:
    predicted_list = rw.read_json(path)
    predicted_lists += [(name,predicted_list)]

In [210]:
total_alignment,not_mappables = align_parsers_to_gold(
    gold_rel=gold_list,
    parsers=predicted_lists)

In [211]:
save_json(total_alignment.values(),"data/project_files/dev/total_alignment.json")
save_json(not_mappables,"data/project_files/dev/not_mappable.json")