# Testing the acronym resolution filter's effect on NER performance

Tong Shu Li<br>
Created on: 2015-12-03<br>
Last updated: 2015-12-03<br>

Here we test the effect of the acronym resolution filter we used for the official evaluation.

In [1]:
import os
import sys

In [2]:
sys.path.append("..")

In [3]:
from src.data_model import parse_input
from src.lingpipe.file_util import save_file
from src.eval_perf import performance

---

### Read the PMID mappings

In [4]:
paper_mapping = save_file("testset_mapping.pickle")

### Read gold standard

In [5]:
loc = os.path.abspath(os.path.join("..", "data", "gold_standard"))
fname = "CDR_TestSet.txt"

eval_gold = parse_input(loc, fname, is_gold = True, return_format = "dict", fix_acronyms = False)

### Read the NER processed testset

In [6]:
loc = os.path.abspath(os.path.join("..", "data", "final_eval"))
fname = "CDR_annotated_testset.txt"

no_fix = parse_input(loc, fname, is_gold = False, return_format = "dict", fix_acronyms = False)
fixed = parse_input(loc, fname, is_gold = False, return_format = "dict", fix_acronyms = True)

In [7]:
def fix_pmids(papers):
    ans = {}
    for pmid, paper in papers.items():
        paper.pmid = paper_mapping[pmid]
        ans[paper_mapping[pmid]] = paper
        
    return ans

In [8]:
no_fix = fix_pmids(no_fix)
fixed = fix_pmids(fixed)

In [9]:
def get_concepts(papers, stype):
    assert stype in ["chemical", "disease"]
    ans = set()
    for pmid, paper in papers.items():
        for annot in paper.annotations:
            if stype == annot.stype:
                uids = [ont_id.flat_repr for ont_id in annot.uid]
                uids = "|".join(sorted(uids))
                
                ans.add((pmid, stype, annot.start, annot.stop, annot.text, uids))
            
    return ans

In [18]:
eval_chem = get_concepts(eval_gold, "chemical")

In [19]:
len(eval_chem)

5385

In [20]:
no_fix_chem = get_concepts(no_fix, "chemical")

In [21]:
len(no_fix_chem)

5052

In [22]:
fixed_chem = get_concepts(fixed, "chemical")

In [23]:
len(fixed_chem)

5052

In [24]:
performance(eval_chem, no_fix_chem, human_readable = True)

# True pos: 4250
# False pos: 802
# False neg: 1135
Precision: 0.8412509897070467
Recall: 0.7892293407613742
F-score: 0.8144102711507137


In [25]:
performance(eval_chem, fixed_chem, human_readable = True)

# True pos: 4460
# False pos: 592
# False neg: 925
Precision: 0.882818685669042
Recall: 0.8282265552460538
F-score: 0.8546517198428667


In [26]:
eval_dise = get_concepts(eval_gold, "disease")
no_fix_dise = get_concepts(no_fix, "disease")
fixed_dise = get_concepts(fixed, "disease")

In [27]:
performance(eval_dise, no_fix_dise, human_readable = True)

# True pos: 3224
# False pos: 1053
# False neg: 1200
Precision: 0.7537993920972644
Recall: 0.72875226039783
F-score: 0.7410642454890243


In [28]:
performance(eval_dise, fixed_dise, human_readable = True)

# True pos: 3224
# False pos: 1053
# False neg: 1200
Precision: 0.7537993920972644
Recall: 0.72875226039783
F-score: 0.7410642454890243
