# Empirical analysis on comparison of EpiGraphDB and ASQ for retrieval of evidence

In [23]:
_input_path = experiment_data_dir / "evidence" / "combined_score.json"
with _input_path.open() as f:
    combined_evidence = json.load(f)
print(len(combined_evidence))

413


In [24]:
_input_path = analysis_dir / "terms_match_epigraphdb.json"
with _input_path.open() as f:
    terms_match_epigraphdb = json.load(f)
print(len(terms_match_epigraphdb))
print(terms_match_epigraphdb[0])

413
{'doi': '10.1101/19001719', 'subject_term': 'Antihypertensive Agents', 'object_term': 'Mood Disorders', 'subject_term_match': {'efo': [], 'gwas': [], 'literature_term': [{'n': {'_name': 'Antihypertensive Agents', 'name': 'Antihypertensive Agents', '_source': ['SemMedDB_VER42_2020_R', 'bioRxiv-2020-10-06', 'medRxiv-2020-10-06'], 'id': 'C0003364', '_id': 'C0003364', 'type': ['phsu']}}]}, 'object_term_match': {'efo': [], 'gwas': [], 'literature_term': []}}


----

----

# Filter evidence

In [199]:
_threshold = 0.99


def filter_combined_evidence_asq(evidence_entry, threshold):
    def _filter_evidence(evidence_list, threshold):
        if threshold is None:
            res = (
                py_.chain(evidence_list)
                .filter(lambda e: e["mapping_score"] == 1.0)
                .value()
            )
        else:
            res = (
                py_.chain(evidence_list)
                .filter(lambda e: e["mapping_score"] >= (threshold * threshold))
                .value()
            )
        return res

    # assoc evidence
    assoc_evidence = evidence_entry["assoc_evidence"]
    assoc_evidence_filter = {
        _: _filter_evidence(assoc_evidence[_], threshold) for _ in assoc_evidence.keys()
    }
    # literature evidence
    triple_evidence = evidence_entry["triple_evidence"]
    triple_evidence_filter = {
        _: _filter_evidence(triple_evidence[_], threshold)
        for _ in triple_evidence.keys()
    }
    # print(gwas_subject_term_matches)
    # combined back
    res = {
        "doi": evidence_entry["doi"],
        "subject_term": evidence_entry["subject_term"],
        "object_term": evidence_entry["object_term"],
        "pred_term": evidence_entry["pred_term"],
        "assoc_evidence": assoc_evidence_filter,
        "triple_evidence": triple_evidence_filter,
    }
    return res


_combined_evidence_filter_asq_step = (
    py_.chain(range(len(combined_evidence)))
    .map(
        lambda idx: filter_combined_evidence_asq(
            evidence_entry=combined_evidence[idx],
            threshold=_threshold,
        )
    )
    .value()
)

print(len(_combined_evidence_filter_asq_step))

_input_dir = analysis_dir / "tmp"
_input_dir.mkdir(exist_ok=True)
_input_path = _input_dir / "asq_threshold_filter.json"
with _input_path.open("w") as f:
    json.dump(_combined_evidence_filter_asq_step, f)

413


In [200]:
combined_evidence_filter = (
    py_.chain(THRESHOLDS)
    .map(
        lambda threshold_rec: {
            "filter_type": threshold_rec["label"],
            "evidence": py_.chain(range(len(combined_evidence)))
            .map(
                lambda idx: filter_combined_evidence_asq(
                    evidence_entry=combined_evidence[idx],
                    threshold=threshold_rec["threshold"],
                )
            )
            .value(),
        }
    )
    .value()
)
print(len(combined_evidence_filter))

_output_path = analysis_dir / "combined_evidence_filter.json"
with _output_path.open("w") as f:
    json.dump(combined_evidence_filter, f)

7
