## Dataset: Cardiac Path Reports, PYP Reports
## Notebook purpose:
- Import existing Annotations from Cardiac Path Reports, PYP Reports and get patient evel diagnosis

In [3]:
from pathlib import Path
import pandas as pd

from datasets import load_annotations, load_patient_diagnosis

In [4]:
pd.set_option("display.max_colwidth", None)

In [5]:
cp__patient_diagnosis =load_patient_diagnosis("cardiac_path_reports")
pyp__patient_diagnosis =load_patient_diagnosis("pyp_reports")
cp__annotations = load_annotations("cardiac_path_reports")
pyp__annotations = load_annotations("pyp_reports")

In [6]:
cp_pyp__patient_diagnosis_path = Path("/data/datasets/Amyloidosis/patient_amyloid_diagnosis/cp_pyp/patient_amyloid_diagnosis.csv")

In [6]:
cp_pyp__patient_diagnosis = pd.merge(cp__patient_diagnosis, pyp__patient_diagnosis, how="outer", on="ir_id", suffixes=("_cp", "_pyp"))

In [7]:
assert not len(
    cp_pyp__patient_diagnosis[
        (cp_pyp__patient_diagnosis["cp__amyloid_diagnosis"].isna())
        & (cp_pyp__patient_diagnosis["pyp__amyloid_diagnosis"].isna())
    ]
)



In [8]:
disagreements = cp_pyp__patient_diagnosis[
    (cp_pyp__patient_diagnosis["cp__amyloid_diagnosis"].notna())
    & (cp_pyp__patient_diagnosis["pyp__amyloid_diagnosis"].notna())
    & (cp_pyp__patient_diagnosis["cp__amyloid_diagnosis"] != cp_pyp__patient_diagnosis["pyp__amyloid_diagnosis"])
].copy(deep=True)

disagreements["cp_pyp__amyloid_diagnosis"] = disagreements.apply(
    lambda row: (row["cp__amyloid_diagnosis"], row["pyp__amyloid_diagnosis"]), axis=1
)


In [9]:
pyp_map = ["Not suggestive", "Strongly suggestive", "Equivocal"]
cp_map = ["Negative", "Positive", "Indeterminate"]
df_disagreements = (
    disagreements["cp_pyp__amyloid_diagnosis"]
    .value_counts()
    .rename_axis("cp_pyp")
    .reset_index(name="counts")
)
df_disagreements["cardiac_path_diagnosis"] = df_disagreements["cp_pyp"].apply(
    lambda x: cp_map[int(x[0])]
)
df_disagreements["pyp_diagnosis"] = df_disagreements["cp_pyp"].apply(
    lambda x: pyp_map[int(x[1])]
)
df_disagreements = df_disagreements.drop(columns=["cp_pyp"])
s = ["Negative", "Positive", "Positive", "Positive", "Negative", "Negative"]
df_disagreements["final_diagnosis"] = pd.Series(s)


In [None]:
df_disagreements

In [14]:
def get_final_diagnosis(row):
    cp_diagnosis = row["cp__amyloid_diagnosis"]
    pyp_diagnosis = row["pyp__amyloid_diagnosis"]
    
    cp_date = row["cp__amyloid_diagnosis_date"]
    pyp_date = row["pyp__amyloid_diagnosis_date"]

    assert not (pd.isnull(cp_diagnosis) and pd.isnull(pyp_diagnosis))
    
    if pd.isnull(cp_diagnosis):
            return int(pyp_diagnosis), pyp_date
    
    elif pd.isnull(pyp_diagnosis):
            return int(cp_diagnosis), cp_date
    
    else:
        cp_diagnosis = int(cp_diagnosis)
        pyp_diagnosis = int(pyp_diagnosis)

        if cp_diagnosis != pyp_diagnosis:
            if cp_diagnosis == 2:
                return pyp_diagnosis, pyp_date
            else:
                return cp_diagnosis, cp_date
        else:
            return cp_diagnosis, cp_date

In [15]:
cp_pyp__patient_diagnosis[
    ["final_diagnosis", "final_diagnosis_date"]
] = cp_pyp__patient_diagnosis.apply(
    lambda row: get_final_diagnosis(row), axis=1, result_type="expand"
)


In [16]:
# cp_pyp__patient_diagnosis.to_csv(cp_pyp__patient_diagnosis_path, index=False)

In [8]:
cp = load_annotations("cardiac_path_reports")