# Processing MedCATTrainer Annotations
A short notebook to demonstrate the MedCATTrainer downloaded annotations schema. Both w/ and w/o text have the same format, except from the source text. 

In [4]:
import pandas as pd
import json

In [5]:
# Load the annotations for all projects
projs = json.load(open('example_data/MedCAT_Export_With_Text_2020-03-31_11_42_07.json'))['projects']

In [8]:
# First (and only) project downloaded
proj = projs[0]

In [60]:
# project level cui / tui filters are top level dict keys
proj.keys()

dict_keys(['name', 'id', 'cuis', 'tuis', 'documents'])

In [40]:
# Annotations are found inside each document.
print(f'# of Documents: {len(proj["documents"])}')
print(f'# of Annotations: {sum([len(d["annotations"]) for d in proj["documents"]])}')

# Annotations that have been marked by a human annotator
print(f'# Validated Annotations: {len([a for d in proj["documents"] for a in d["annotations"] if a["validated"] == True])}')

# Annotations that have been marked correct - (blue) 
print(f'# Correct Annotations: {len([a for d in proj["documents"] for a in d["annotations"] if a["correct"] == True])}')

# Annotations that have been marked incorrect  - (red)
print(f'# Correct Annotations: {len([a for d in proj["documents"] for a in d["annotations"] if a["deleted"] == True])}')

# Annotations that have been marked terminated - (dark red)
print(f'# Correct Annotations: {len([a for d in proj["documents"] for a in d["annotations"] if a["killed"] == True])}')

# Annotations that have been marked alternative - (turquoise)
print(f'# Correct Annotations: {len([a for d in proj["documents"] for a in d["annotations"] if a["alternative"] == True])}')

# Annotations that have been manually created via right-click - 'Add Annotation', these will also be 'correct' == True
print(f'# Correct Annotations: {len([a for d in proj["documents"] for a in d["annotations"] if a["manually_created"] == True])}')

# of Documents: 2
# of Annotations: 286
# Validated Annotations: 286
# Correct Annotations: 98
# Correct Annotations: 185
# Correct Annotations: 0
# Correct Annotations: 1
# Correct Annotations: 3


### Meta Annotations 
Each Meta Annotation will have the names of the task and associated values you've previously selected.


In [None]:
## Correct Annotations that are Correct and Meta Annotation Temporarilty - Present, Experiencer - Patient

In [59]:
annos = []
for doc in proj['documents']:
    for a in doc['annotations']:
        meta_anns = a['meta_anns']
        if a['correct'] == True and len(meta_anns) != 0:
            # meta_anns are a list of dictionaries, each dict is a meta annotation. Order is not neccessarily consistent
            temporality = [m for m in meta_anns if m['name'] == 'Temporality'][0]
            experiencer = [m for m in meta_anns if m['name'] == 'Experiencer'][0]
            if temporality['value'] == 'Current' and experiencer['value'] == 'Patient':
                # pull out the doc_name, the text span value, and the concept
                annos.append({'doc_name': doc['name'], 'anno_value': a['value'], 'cui': a['cui']})
# make DataFrame
df = pd.DataFrame(annos)
df.head(5)

Unnamed: 0,doc_name,anno_value,cui
0,Subject 7,female,S-259051005
1,Subject 7,discoid lateral meniscus,S-202099003
2,Subject 7,collateral ligament,S-457008
3,Subject 7,consistent with,S-7883008
4,Subject 7,complete tear,S-263722006
