In [1]:
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
from collections import Counter

In [2]:
filtered_gt_path = "/home/cptaswadu/new-rescue/RESCUE-n8n/eval/insurance/dataset/filtered_ground_truth.json"
old_gt_path = "/home/cptaswadu/new-rescue/RESCUE-n8n/eval/insurance/dataset/ground_truth.json"
csv_dir = "/home/cptaswadu/new-rescue/RESCUE-n8n/eval/insurance/dataset/"

with open(filtered_gt_path, 'r') as f:
    filtered_data = json.load(f)

with open(old_gt_path, 'r') as f:
    old_gt = json.load(f)

q3_cat_ind = pd.read_csv(f'{csv_dir}cat_ind_q3.csv')
q3_cat_test_ins = pd.read_csv(f'{csv_dir}cat_test_ins_q3.csv')
q3_merged = pd.merge(q3_cat_ind, q3_cat_test_ins, on='Category', how='outer')
q3_merged.to_csv(f'{csv_dir}q3_merged.csv', index=False)

q5_cat_hist = pd.read_csv(f'{csv_dir}cat_ind_q5.csv')
q5_cat_test_hist = pd.read_csv(f'{csv_dir}cat_test_ins_q5.csv')
q5_merged = pd.merge(q5_cat_hist, q5_cat_test_hist, on='Category', how='outer')
q5_merged.to_csv(f'{csv_dir}q5_merged.csv', index=False)

In [3]:
def is_in_age_range(age_value, target_ranges):
    age_value = int(age_value)
    for min_age, max_age in target_ranges:
        if min_age == 0 and max_age == 0:
            if age_value == 0:
                return True
        elif min_age <= age_value <= max_age:
            return True
    return False

def get_answers(sample_patient_dict):
    case_id = sample_patient_dict["case_id"]
    insurance = sample_patient_dict["insurance"]
    genetic_tests = sample_patient_dict["genetic_tests"]
    age = sample_patient_dict["age_years"]
    order_provider = sample_patient_dict["order_provider"]
    clinical_indication = sample_patient_dict["clinical_indication"]
    prior_testing = sample_patient_dict["prior_testing"]
    family_history = sample_patient_dict["family_history"]
    genetic_counselor = sample_patient_dict["genetic_counselor"]

    q0 = q1 = q2 = q3 = q4 = q5 = q6 = q7 = q8 = "Not Specified"

    # Q0 - genetic tests
    if genetic_tests.startswith('CMA'):
        q0 = "CMA"
    else:
        q0 = genetic_tests
    
    # Q1 - age criteria
    if insurance == "BCBS_FEP":
        if genetic_tests in ["WES", "WGS"]:
            if age == -1:
                q1 = "Not Specified"
            elif age <= 18:
                q1 = "Yes"
            else:
                q1 = "No"
        elif genetic_tests == "CMA":
            if age == -1:
                q1 = "Not Specified"
            elif age <= 18:
                q1 = "Yes"
            else:
                q1 = "No"
        elif genetic_tests == "BRCA1/2":
            if age == -1 or age < 18:
                q1 = "No"
            else:
                if clinical_indication in bcbs_brca_age_indications:
                    required_age = bcbs_brca_age_indications[clinical_indication]
                    if isinstance(required_age, tuple):
                        if required_age[0] <= age <= required_age[1]:
                            q1 = "Yes"
                    else:
                        if age <= required_age:
                            q1 = "Yes"
                elif clinical_indication in bcbs_brca_no_age_indications:
                    q1 = "Yes"
            
                if q1 == "Not Specified" and family_history in bcbs_brca_family_indications:
                    q1 = "Yes"
            
    
    elif insurance == "UHC":
        if genetic_tests in ["WES", "WGS"]:
            if age == -1:
                q1 = "Yes" if genetic_tests == "WES" else "No"
            elif clinical_indication in uhc_diagnosed_age_indications:
                required_age = uhc_diagnosed_age_indications[clinical_indication]
                q1 = "Yes" if age <= required_age else "Not Specified"
            else:
                q1 = "Not Specified"
                
        elif genetic_tests == "CMA":
            if age == -1:
                q1 = "Yes"
            elif clinical_indication in uhc_cma_dignosed_age_indications:
                required_age = uhc_cma_dignosed_age_indications[clinical_indication]
                q1 = "Yes" if age <= required_age else "Not Specified"
            else:
                q1 = "Not Specified"

        elif genetic_tests == "BRCA1/2":
            if clinical_indication in uhc_brca_age_indications:
                if age <= 50:
                    q1 = "Yes"
            elif clinical_indication in uhc_brca_indications:
                if age <= 65:
                    q1 = "Yes"
            if q1 == "Not Specified" and family_history in uhc_brca_family_indications:
                q1 = "Yes"

    
    elif insurance == "Cigna":
        if genetic_tests in ["WES", "WGS"]:
            if age == -1:
                q1 = "Yes" if genetic_tests == "WES" else "No"
            else:
                if age >= 21:
                    q1 = "No"
                else:
                    if clinical_indication in cigna_diagnosed_age_indications:
                        required_age = cigna_diagnosed_age_indications[clinical_indication]
                        if age <= required_age:
                            q1 = "Yes"
                        else:
                            q1 = "Not Specified"
                    else:
                        q1 = "Yes"
                        
        elif genetic_tests == "BRCA1/2":
            if age == -1 or age < 18:
                q1 = "No"
            else:
                if clinical_indication in cigna_brca_age_indications:
                    required_age = cigna_brca_age_indications[clinical_indication]
                    if age <= required_age:
                        q1 = "Yes"
                elif clinical_indication in cigna_brca_indications:
                    q1 = "Yes"
                if q1 == "Not Specified" and family_history in cigna_brca_family_indications:
                    q1 = "Yes"
            
        elif genetic_tests == "CMA_developmental_disorder":
            q1 = "Yes" if age == 0 else "No"
    
    # Q2 - order provider
    if insurance == "UHC" and genetic_tests in ["WES", "WGS"]:
        approved_providers = ["Neurologist", "Medical Geneticist", "Neonatologist", "Developmental Pediatrician"]
        q2 = "Yes" if order_provider in approved_providers else "No"
    
    # Q3 - clinical indication
    mask_test = (q3_merged['Test'] == genetic_tests)
    mask_ind = (q3_merged['Indication'] == clinical_indication)
    mask_ins = (q3_merged['Insurance'] == insurance)
    
    full_match = len(q3_merged[mask_test & mask_ind & mask_ins]) > 0
    test_insurance_combo = len(q3_merged[mask_test & mask_ins]) > 0
    
    if full_match:
        q3 = "Yes"
    elif test_insurance_combo:
        q3 = "No"
    else:
        q3 = "Not Specified"
    
    # Q4 - prior testing
    if genetic_tests in ["WES", "WGS"]:
        q4 = "Yes" if prior_testing in ["CMA", "Fragile X testing", "FISH testing", "Karyotype testing"] else "No"
    
    # Q5 - family history
    mask_test = (q5_merged['Test'] == genetic_tests)
    mask_fh = (q5_merged['Family history'] == family_history)
    mask_ins = (q5_merged['Insurance'] == insurance)
    
    full_match = len(q5_merged[mask_test & mask_fh & mask_ins]) > 0
    test_insurance_combo = len(q5_merged[mask_test & mask_ins]) > 0
    
    if full_match:
        q5 = "Yes"
    elif test_insurance_combo:
        q5 = "No"
    else:
        q5 = "Not Specified"
    
    # Q6 - genetic counselor
    approved_counseling = [
        "Saw a genetic counselor before testing and will visit after results are received",
        "Saw a genetic counselor multiple times",
        "discussed with a genetic specialist"
    ]
    
    if insurance == "BCBS_FEP" and genetic_tests in ["BRCA1/2", "WES", "WGS"]:
        q6 = "Yes" if genetic_counselor in approved_counseling else "No"
    elif insurance == "Cigna" and genetic_tests in ["BRCA1/2", "CMA_developmental_disorder"]:
        q6 = "Yes" if genetic_counselor in approved_counseling else "No"
    
    # Q7 - CPT code
    cpt_mapping = {
        "WES": "81415", "WGS": "81425", "BRCA1/2": "81162",
        "CMA": "81228", "CMA_developmental_disorder": "81228", "CMA_tumor": "81277"
    }
    
    if insurance in ["UHC", "Cigna"]:
        if q0 == "CMA":
            if insurance == "UHC" and genetic_tests == "CMA_tumor":
                q7 = cpt_mapping.get("CMA")
            elif insurance == "Cigna" and genetic_tests == "CMA":
                q7 = "Not Specified"
            else:
                q7 = cpt_mapping.get(genetic_tests)
        else:
            q7 = cpt_mapping.get(q0)
    
    # Q8 - final decision
    all_answers = [q1, q2, q3, q4, q5, q6]
    q8 = "No" if "No" in all_answers else "Yes"
    
    return {
        "Q0": q0, "Q1": q1, "Q2": q2, "Q3": q3,
        "Q4": q4, "Q5": q5, "Q6": q6, "Q7": q7, "Q8": q8
    }

uhc_diagnosed_age_indications = {
    "Moderate, severe, or profound Intellectual Disability diagnosed by 18 years of age": 18,
    "Epileptic encephalopathy with onset before three years of age": 3,
    "Global Developmental Delay": 5,
    "Global developmental delay": 5,
    "Unexplained epileptic encephalopathy (onset before three years of age) and no prior epilepsy multigene panel testing performed": 3,
    "Significant hearing or visual impairment diagnosed by 18 years of age": 18,
    "Developmental Delay/Intellectual Disability where a specific syndrome is not suspected": 18
}

uhc_cma_dignosed_age_indications = {
    "Global developmental delay": 5,
    "Global Developmental Delay": 5,
    "Moderate, severe, or profound Intellectual Disability diagnosed by 18 years of age": 18,
    "Developmental Delay/Intellectual Disability where a specific syndrome is not suspected": 18
}

cigna_diagnosed_age_indications = {
    "Moderate, severe, or profound Intellectual Disability diagnosed by 18 years of age": 18,
    "Epileptic encephalopathy with onset before three years of age": 3,
    "Unexplained epileptic encephalopathy (onset before three years of age) and no prior epilepsy multigene panel testing performed": 3,
    "Global developmental delay": 5,
    "Global Developmental Delay": 5,
    "Developmental Delay/Intellectual Disability where a specific syndrome is not suspected": 18
}

bcbs_brca_age_indications = {
    "Diagnosed breast cancer at age equal to under 45": 45,
    "Personal history of breast cancer diagnosed between ages 46 and 50 with an additional primary breast cancer at any age": (46, 50),
    "Multiple primary Breast Cancers (as a prior diagnosis or as a bilateral primary cancer)": (46, 50),
    "Breast Cancer and Unknown or Limited Family History": (46, 50),
    "Personal history of breast cancer diagnosed between ages 46 and 50 with at least one close relative diagnosed with breast, ovarian, pancreatic, or prostate cancer at any age": (46, 50),
    "Personal history of breast cancer diagnosed equal and under 60 with Triple-negative breast cancer": 60,
    "Personal history of breast cancer diagnosed any age with at least one close relative with breast cancer diagnosed at age equal to under 50 years": 50,
    "Female with breast cancer diagnosis 50 years of age or younger": 50,
    "Breast cancer diagnosed at age 50 or younger": 50,
    "Triple-negative breast cancer": 60
}

bcbs_brca_no_age_indications = {
    "Diagnosed with two or more primary breast cancers at any age": None,
    "Diagnosed at any age with triple negative breast cancer (i.e., estrogen receptornegative (ER-), progesterone receptor negative (PR-), and human epidermal growth factor receptor negative (HER2-) breast cancer)": None,
    "Male with breast cancer at any age": None,
    "Metastatic Breast Cancer": None,
    "Breast Cancer and individual was assigned male at birth": None,
    "Personal history of breast cancer diagnosed any age with at least one close relative with ovarian carcinoma": None,
    "Personal history of epithelial ovarian carcinoma (including fallopian tube cancer or peritoneal cancer) at any age": None,
    "Epithelial ovarian, fallopian tube, or primary peritoneal cancer diagnosis at any age": None,
    "Personal history of breast cancer diagnosed any age with at least one close relative with pancreatic cancer": None,
    "Personal history of exocrine pancreatic cancer at any age": None,
    "Exocrine pancreatic cancer": None,
    "Pancreatic cancer at any age": None,
    "Personal history of breast cancer diagnosed any age with at least one close relative with Metastatic or intraductal/cribriform prostate cancer": None,
    "Personal history of metastatic or intraductal/cribriform histology prostate cancer at any age": None,
    "Prostate cancer at any age with metastatic, intraductal/cribriform histology, high-risk, or very-high-risk group": None,
    "Metastatic prostate cancer": None
}

bcbs_brca_family_indications = q5_merged[
    (q5_merged['Test'] == 'BRCA1/2') & 
    (q5_merged['Insurance'] == 'BCBS_FEP')
]['Family history'].tolist()


uhc_brca_indications = q3_merged[
    (q3_merged['Test'] == 'BRCA1/2') & 
    (q3_merged['Insurance'] == 'UHC')
]['Indication'].tolist()

uhc_brca_age_indications = {
    "Diagnosed breast cancer at age equal to under 45": 50,
    "Personal history of breast cancer diagnosed between ages 46 and 50 with an additional primary breast cancer at any age": 50,
    "Female with breast cancer diagnosis 50 years of age or younger": 50,
    "Breast cancer diagnosed at age 50 or younger": 50
}

uhc_brca_family_indications = q5_merged[
    (q5_merged['Test'] == 'BRCA1/2') & 
    (q5_merged['Insurance'] == 'UHC')
]['Family history'].tolist()


cigna_brca_age_indications = {
    "Diagnosed breast cancer at age equal to under 45": 50,
    "Personal history of breast cancer diagnosed between ages 46 and 50 with an additional primary breast cancer at any age": 50,
    "Female with breast cancer diagnosis 50 years of age or younger": 50,
    "Breast cancer diagnosed at age 50 or younger": 50,
    "Personal history of breast cancer diagnosed between ages 46 and 50 with at least one close relative diagnosed with breast, ovarian, pancreatic, or prostate cancer at any age": 50
}

cigna_brca_indications = q3_merged[
    (q3_merged['Test'] == 'BRCA1/2') & 
    (q3_merged['Insurance'] == 'Cigna')
]['Indication'].tolist()

cigna_brca_family_indications = q5_merged[
    (q5_merged['Test'] == 'BRCA1/2') & 
    (q5_merged['Insurance'] == 'Cigna')
]['Family history'].tolist()

In [4]:
new_ground_truth = {}

for item in filtered_data:
    case_id = list(item.keys())[0]
    case_data = item[case_id]
    
    sample_dict = case_data["sample_patient_dict"]
    answers = get_answers(sample_dict)
    
    expected_md5 = old_gt.get(case_id, {}).get("expected_md5", "UNKNOWN")

    new_ground_truth[case_id] = {**answers, "expected_md5": expected_md5}
    
with open(f'{csv_dir}final_ground_truth.json', 'w') as f:
    json.dump(new_ground_truth, f, indent=4)