# Merge Assessor Annotations

Annotation occurs at the span level, and we want to map spans to the facts they overlap. 

We produce a file containing all the facts and related spans.

In [None]:
import collections
import glob 
import json
import pandas as pd

In [None]:
with open("collapsed-event-days-summaries.k=512.meta.json", "r") as in_file:
    summary_metadata = json.load(in_file)

In [None]:
req_fact_span_map = {}

for k,v in summary_metadata.items():

    this_meta = v["meta"]
    loc_to_fact_map = {}

    for f_id,(start,end) in zip(this_meta["spans_facts"],this_meta["spans_list"]):
        for i in range(start,end):
            loc_to_fact_map[i] = (f_id, start, end)
            
    req_fact_span_map[k] = loc_to_fact_map


In [None]:
with open("merged-annotations.json", "r") as in_file:
    annotation_data = json.load(in_file)

In [None]:
annotators = {
    "redacted_user_id_01" : "assr_01",
    "redacted_user_id_02" : "assr_02",
    "redacted_user_id_03" : "assr_03",
    "redacted_user_id_04" : "assr_04",
    "redacted_user_id_05" : "assr_05",
    "redacted_user_id_06" : "assr_06",
}

In [None]:
for req_id,req_data in annotation_data.items():
    print(req_id)
    
    this_fact_span_map = req_fact_span_map[req_id]
    
    for span in req_data["spans"]:
        this_span_fact_set = set()
        for i in range(span["start"],span["end"]):
            this_span_fact_set.add(this_fact_span_map.get(i, (None,))[0])
            
        span["facts"] = list(this_span_fact_set)
        span["uid"] = annotators.get(span["uid"])

    this_fact_list = [(f,s["uid"],s["label"]) for s in sorted(req_data["spans"], key=lambda d: d["start"]) for f in s["facts"]]
    req_data["fact_list"] = this_fact_list

In [None]:
with open("final-annotated-facts-results.json", "w") as out_file:
    json.dump(annotation_data, out_file)