In [None]:
import pandas as pd

In [None]:
icd9_file = pd.read_excel('./Section111ValidICD9-Jan2024.xlsx')
icd10_file = pd.read_excel('./Section111ValidICD10-Jan2024.xlsx')

In [None]:
# Function to match ICD codes
def match_codes(df, codes):
    pattern = f"^(?:{'|'.join(codes)})"
    matched = df[df['CODE'].astype(str).str.contains(pattern, regex=True)]
    return matched['CODE'].unique()

In [None]:
# Function to match ICD codes
def match_codes(df, codes):
    pattern = f"^(?:{'|'.join(codes)})"
    matched = df[df['CODE'].astype(str).str.contains(pattern, regex=True)]
    return matched['CODE'].unique()

In [None]:
#icd9 and icd10 codes for each diagnosis requested 

conditions = {
    "Total Cardiovascular Disease (CVD)": {'icd9': [f"{i}" for i in range(401,450)], 'icd10': [f"I{str(i).zfill(2)}" for i in range(10, 80)]},
    "Congestive Heart Failure (CHF)": {'icd9': ['428'], 'icd10': ['I50']},
    "Myocardial Infarction (MI)": {'icd9': ['410'], 'icd10': [f"I{str(i).zfill(2)}" for i in range(21,23)]},
    "Ischemic Stroke": {'icd9': ['433','434', '436'], 'icd10': ['I63']},
    "Hemorrhagic Stroke": {'icd9': ['430','431', '432'], 'icd10': [f"I{str(i).zfill(2)}" for i in range(60, 63)]},
    "Stroke": {'icd9': ['430', '431', '432', '433', '434', '436'], 'icd10': [f"I{str(i).zfill(2)}" for i in range(60, 64)]},
    "Pneumonia": {'icd9': [f"{i}" for i in range(480, 488)], 'icd10': [f"J{str(i).zfill(2)}" for i in range(9, 19)]},
    "Chronic Obstructive Pulmonary Disease (COPD)": {'icd9': ['490', '491', '492', '494', '495', '496'], 'icd10': [f"J{str(i).zfill(2)}" for i in range(40, 45)] + ['J47']},
    "Hypertension": {'icd9': ['401'], 'icd10': ['I10']},
    "Diabetes": {'icd9': ['250'], 'icd10': [f"E{str(i).zfill(2)}" for i in range(10, 15)]},
    "Bronchus, Trachea, & Lung Cancer": {'icd9': ['162'], 'icd10': ['C33', 'C34']},
    "Melanoma": {'icd9': ['172'], 'icd10': ['C43']},
    "Breast Cancer": {'icd9': ['174','175'], 'icd10': ['C50']},
    "Leukemia": {'icd9': [f"{i}" for i in range(203, 209)], 'icd10': [f"C{str(i).zfill(2)}" for i in range(90, 96)]},
    "All Cancer": {'icd9': [f"{i}" for i in range(140, 210)], 'icd10': [f"C{str(i).zfill(2)}" for i in range(0, 77)] + [f"C{str(i).zfill(2)}" for i in range(80, 98)] + ['C7A']},
    "All Cancer minus Bronchus, Trachea, & Lung": {'icd9': [f"{i}" for i in range(140, 162)] + [f"{i}" for i in range(163, 210)], 'icd10': [f"C{str(i).zfill(2)}" for i in range(0, 33)] + [f"C{str(i).zfill(2)}" for i in range(35, 77)] + [f"C{str(i).zfill(2)}" for i in range(80, 98)] + ['C7A']},
}

In [None]:
short_names = {
    "Total Cardiovascular Disease (CVD)": "cvd",
    "Congestive Heart Failure (CHF)": "chf",
    "Myocardial Infarction (MI)": "mi",
    "Ischemic Stroke": "isch_stroke",
    "Hemorrhagic Stroke": "hem_stroke",
    "Stroke": "stroke",
    "Pneumonia": "pneumonia",
    "Chronic Obstructive Pulmonary Disease (COPD)": "copd",
    "Hypertension": "htn",
    "Diabetes": "diabetes",
    "Bronchus, Trachea, & Lung Cancer": "lung_cancer",
    "Melanoma": "melanoma",
    "Breast Cancer": "breast_cancer",
    "Leukemia": "leukemia",
    "All Cancer": "all_cancer",
    "All Cancer minus Bronchus, Trachea, & Lung": "all_cancer_minus_lung"
}

In [None]:
yaml_output = " "

for condition, codes in conditions.items():

    identifier = short_names[condition]
    icd9_codes = match_codes(icd9_file, codes['icd9']) if codes['icd9'] else []
    icd10_codes = match_codes(icd10_file, codes['icd10']) if codes['icd10'] else []

    yaml_section = f"""
{identifier} :
    long_name : "{condition}"
    icd9 : {list(icd9_codes)}
    icd10 : {list(icd10_codes)}
"""
    yaml_output += yaml_section

with open('../conf/icd_codes/icd_codes_11.yaml', 'w') as file:
    file.write(yaml_output)

print(yaml_output)