# by pred count

In [289]:
claim_count_parsed_sample = (
    py_.chain(triples)
    .filter(lambda e: len(e["triples"]) > 0)
    .map(lambda e: [_["pred"] for _ in e["triples"]])
    .flatten()
    .thru(pd.Series)
    .value()
    .to_frame(name="pred")
    .groupby("pred")
    .size()
)
claim_count_parsed_sample

pred
AFFECTS            2487
ASSOCIATED_WITH    1722
CAUSES             1127
COEXISTS_WITH      2712
INTERACTS_WITH      609
TREATS             5779
dtype: int64

In [291]:
claim_count_evidence_sample = (
    py_.chain(combined_evidence)
    .map(lambda e: e["pred_term"])
    .thru(pd.Series)
    .value()
    .to_frame(name="pred")
    .groupby("pred")
    .size()
)
claim_count_evidence_sample

pred
AFFECTS             85
ASSOCIATED_WITH     66
CAUSES              67
COEXISTS_WITH      170
INTERACTS_WITH       4
TREATS              21
dtype: int64

In [295]:
ent_evidence_count = (
    py_.chain(combined_evidence)
    .map(
        lambda e: {
            "pred": e["pred_term"],
            "num_efo": e["efo_ents"]["subject_ents"].__len__()
            + e["efo_ents"]["subject_ents"].__len__(),
            "num_umls": e["umls_ents"]["subject_ents"].__len__()
            + e["umls_ents"]["subject_ents"].__len__(),
            "num_traits": e["trait_ents"]["subject_ents"].__len__()
            + e["trait_ents"]["subject_ents"].__len__(),
            "num_triple": sum([_.__len__() for _, k in e["triple_evidence"].items()]),
            "num_assoc": sum([_.__len__() for _, k in e["assoc_evidence"].items()]),
        }
    )
    .thru(pd.DataFrame)
    .value()
    .groupby("pred")
    .sum()
)
ent_evidence_count

Unnamed: 0_level_0,num_efo,num_umls,num_traits,num_triple,num_assoc
pred,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AFFECTS,344,7984,1432,1955,7735
ASSOCIATED_WITH,224,5316,2254,660,2442
CAUSES,298,7824,1280,1541,6097
COEXISTS_WITH,744,18430,4000,1700,6290
INTERACTS_WITH,10,354,34,40,148
TREATS,70,2200,116,483,1911


In [298]:
desc_count = ent_evidence_count.merge(
    claim_count_evidence_sample.to_frame(name="num_claim_evidence_sample"),
    right_index=True,
    left_index=True,
).merge(
    claim_count_parsed_sample.to_frame(name="num_claim_parsed_sample"),
    right_index=True,
    left_index=True,
)
desc_count

Unnamed: 0_level_0,num_efo,num_umls,num_traits,num_triple,num_assoc,num_claim_evidence_sample,num_claim_parsed_sample
pred,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AFFECTS,344,7984,1432,1955,7735,85,2487
ASSOCIATED_WITH,224,5316,2254,660,2442,66,1722
CAUSES,298,7824,1280,1541,6097,67,1127
COEXISTS_WITH,744,18430,4000,1700,6290,170,2712
INTERACTS_WITH,10,354,34,40,148,4,609
TREATS,70,2200,116,483,1911,21,5779


In [300]:
output_file = analysis_assets_dir / "analysis-desc-count.tex"
desc_count.style.to_latex(output_file)

In [305]:
ent_evidence_count_supplementary = (
    py_.chain(combined_evidence)
    .map(
        lambda e: {
            "pred": e["pred_term"],
            "num_efo_high": len(
                [
                    _
                    for _ in e["efo_ents"]["subject_ents"]
                    if _["similarity_score"] > 0.85
                ]
            )
            + len(
                [
                    _
                    for _ in e["efo_ents"]["object_ents"]
                    if _["similarity_score"] > 0.85
                ]
            ),
            "num_umls_high": len(
                [
                    _
                    for _ in e["umls_ents"]["subject_ents"]
                    if _["similarity_score"] > 0.85
                ]
            )
            + len(
                [
                    _
                    for _ in e["umls_ents"]["object_ents"]
                    if _["similarity_score"] > 0.85
                ]
            ),
            "num_traits_high": len(
                [
                    _
                    for _ in e["trait_ents"]["subject_ents"]
                    if _["similarity_score"] > 0.85
                ]
            )
            + len(
                [
                    _
                    for _ in e["trait_ents"]["object_ents"]
                    if _["similarity_score"] > 0.85
                ]
            ),
            "num_triple_supporting": len(
                [
                    _
                    for _ in e["triple_evidence"]["supporting"]
                    if "supporting" in e["triple_evidence"].keys()
                ]
            ),
            "num_triple_high": len(
                [
                    _
                    for _ in e["triple_evidence"]["supporting"]
                    if "supporting" in e["triple_evidence"].keys()
                    and _["evidence_score"] >= 1
                ]
            ),
            "num_assoc_supporting": len(
                [
                    _
                    for _ in e["assoc_evidence"]["supporting"]
                    if "supporting" in e["assoc_evidence"].keys()
                ]
            ),
            "num_assoc_high": len(
                [
                    _
                    for _ in e["assoc_evidence"]["supporting"]
                    if "supporting" in e["assoc_evidence"].keys()
                    and _["evidence_score"] >= 1
                ]
            ),
        }
    )
    .thru(pd.DataFrame)
    .value()
    .groupby("pred")
    .sum()
)
ent_evidence_count_supplementary

Unnamed: 0_level_0,num_efo_high,num_umls_high,num_traits_high,num_triple_supporting,num_triple_high,num_assoc_supporting,num_assoc_high
pred,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AFFECTS,288,3806,679,375,141,372,196
ASSOCIATED_WITH,207,2560,474,211,73,3106,1064
CAUSES,242,3797,385,1266,482,412,123
COEXISTS_WITH,681,8131,1402,1820,736,2401,1185
INTERACTS_WITH,8,136,30,4,1,46,17
TREATS,61,1077,100,283,143,83,55


In [306]:
output_file = analysis_assets_dir / "analysis-desc-count-supplementary.tex"
ent_evidence_count_supplementary.style.to_latex(output_file)