In [None]:
##### REQUIRES THE DATAFRAME FOLDER TO BE NAMED 'Cohorts', WHICH INCLUDES ALL PRECOMPUTED DATAFRAMES #####
import fiber
from fiber.cohort import Cohort
from fiber.condition import Patient, MRNs
from fiber.condition import Diagnosis
from fiber.condition import Measurement, Encounter, Drug, LabValue, Procedure, VitalSign 
from fiber.storage import yaml as fiberyaml
import pandas as pd
import pyarrow.parquet as pq
import numpy as np
import os
from functools import reduce 

# Define ICD Codes for Heart Failure
We are currently using the suggesteg ICD Codes by the phenotyping algorithm from PheKB

In [None]:
conditions =  (Diagnosis("I50%", "ICD-10")|
              Diagnosis ("428%","ICD-9"))

# Get ICD HF Cohort

In [None]:
HF_cohort=Cohort(conditions)

In [None]:
HF_cohort=HF_cohort.get(conditions)

In [None]:
#sort entries by MRN and age in days to get the first diagnosis per patient
HF_cohort= HF_cohort.sort_values(['medical_record_number','age_in_days'], ascending=[True,True])
HF_cohort

In [None]:
#Save Cohort with all encounters, that inlcude the defind ICD Codes
HF_cohort.to_parquet('ALL_HF_ICD_Cohort.parquet')

In [None]:
#get unique MRN 
HF_cohort=HF_cohort.loc[~HF_cohort["medical_record_number"].duplicated(keep='first')]


In [None]:
HF_cohort

In [None]:
HF_cohort_index=HF_cohort.set_index('medical_record_number', inplace=False)

In [None]:
HF_cohort_index

In [None]:
#Save Cohort as Parquet
HF_cohort_index.to_parquet('Unique_HF_ICD_Cohort.parquet')

# Get Cohort which also contains EF Measurements


In [None]:
mrns = list(HF_cohort_index.index)
condition = MRNs(mrns)
cohort_unique_MRN=Cohort(condition)

In [None]:
#Define ejection fraction condition: 
condition=(LabValue('%ejection%'))
cohort_EF=cohort_unique_MRN.get(condition)

In [None]:
cohort_EF

In [None]:
#sort entries by MRN and age in days to get the first diagnosis per patient
cohort_EF= cohort_EF.sort_values(['medical_record_number','age_in_days'], ascending=[True,True])
cohort_EF

In [None]:
#Save Cohort with all encounters, that inlcude a measurement of EF
HF_cohort.to_parquet('ALL_HF_ICD_EF_Cohort.parquet')

In [None]:
#get unique MRN 
cohort_EF=cohort_EF.loc[~cohort_EF["medical_record_number"].duplicated(keep='last')]

In [None]:
cohort_EF_index=cohort_EF.set_index('medical_record_number', inplace=False)

In [None]:
cohort_EF_index

In [None]:
#Unique final EF Cohort: 
cohort_EF_index.to_parquet('Unique_HF_ICD_EF_Cohort.parquet')

In [None]:
#Merge Dataframes
final_HF_ICD_Cohort=HF_cohort_index.merge(cohort_EF_index, left_on="medical_record_number",right_index=True, how="inner")

In [None]:
final_HF_ICD_Cohort

In [None]:
#save final ICD Cohort: 
final_HF_ICD_Cohort.to_parquet('Merged_HF_ICD_EF_Cohort.parquet')