In [1]:
# import otter
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [9]:
KFF2019_adult_original = pd.read_csv('final_datasets_V1/KFF/kff_health_insurance_2019_adult.csv', skiprows = 2)
KFF2019_female_original = pd.read_csv('final_datasets_V1/KFF/kff_health_insurance_2019_female.csv', skiprows = 2)
KFF2019_male_original = pd.read_csv('final_datasets_V1/KFF/kff_health_insurance_2019_male.csv', skiprows = 2)

In [11]:
KFF2019_adult_original["Gender"] = "All"
KFF2019_female_original["Gender"] = "Female"
KFF2019_male_original["Gender"] = "Male"

KFF2019 = pd.concat([KFF2019_adult_original, KFF2019_female_original, KFF2019_male_original], ignore_index=True)[["Location", "Gender", "Uninsured"]]
KFF2019.shape

(249, 3)

In [12]:
KFF2019.head()

Unnamed: 0,Location,Gender,Uninsured
0,United States,All,0.129
1,Alabama,All,0.149
2,Alaska,All,0.153
3,Arizona,All,0.154
4,Arkansas,All,0.132


In [13]:
USCDI_original = pd.read_csv('final_datasets_V1/CDC/U.S._Chronic_Disease_Indicators.csv')

In [26]:
USCDI = USCDI_original[["YearStart","YearEnd","LocationDesc",
                        "Topic","Question","DataValueUnit", "DataValueType", "DataValue",
                        "StratificationCategory1", "Stratification1"]]
USCDI.shape

(309215, 10)

### Impact by Sex 

In [62]:
condition_1_t = USCDI["Topic"] == "Cardiovascular Disease"
condition_1_q = USCDI["Question"] == "Coronary heart disease mortality among all people, underlying cause"
condition_1_dvu = USCDI["DataValueUnit"] == "cases per 100,000"
condition_1_sc1 = USCDI["StratificationCategory1"].isin(["Sex", "Age"])
condition_1_dvt = USCDI["DataValueType"] == "Crude Rate"
condition_1_syear = USCDI["YearStart"] == 2019
condition_1_eyear = USCDI["YearEnd"] == 2019

USCDI_sex = USCDI[condition_1_t & 
    condition_1_q & 
    condition_1_dvu & 
    condition_1_sc1 & 
#     condition_1_dvt & 
    condition_1_syear
][["LocationDesc", "DataValue", "StratificationCategory1", "Stratification1"]]
# USCDI_sex.rename(columns={'DataValue': 'CHDCasesPer100000'}, inplace=True)
USCDI_sex

Unnamed: 0,LocationDesc,DataValue,StratificationCategory1,Stratification1
14902,California,1.9,Age,Age 0-44
15188,Alaska,39.7,Sex,Female
15499,California,110.6,Sex,Male
15589,Alaska,307.9,Age,Age >=65
15903,Alabama,434.6,Age,Age >=65
...,...,...,...,...
108707,Vermont,109.3,Sex,Female
108768,Wisconsin,76.4,Age,Age 45-64
108917,Wyoming,70.1,Age,Age 45-64
109141,Washington,66.7,Sex,Female


In [63]:
filtered_df = USCDI_sex[USCDI_sex['Stratification1'].isin(['Female', 'Male'])]
grouped = filtered_df.groupby(['LocationDesc', 'Stratification1'], as_index=False)['DataValue'].sum()
coronary_proportions = grouped.pivot(index='LocationDesc', columns='Stratification1', values='DataValue').fillna(0)
coronary_proportions.index.name = 'LocationDesc'
coronary_proportions.reset_index(inplace=True)
coronary_proportions['Frac_F'] = coronary_proportions['Female'] / (coronary_proportions['Female'] + coronary_proportions['Male'])
coronary_proportions

Stratification1,LocationDesc,Female,Male,Frac_F
0,Alabama,134.8,232.4,0.367102
1,Alaska,82.0,157.4,0.342523
2,Arizona,137.8,236.5,0.368154
3,Arkansas,229.6,390.4,0.370323
4,California,128.9,222.2,0.367132
5,Colorado,91.5,168.9,0.351382
6,Connecticut,140.3,223.4,0.385757
7,Delaware,138.4,262.5,0.345223
8,District of Columbia,154.0,235.1,0.395785
9,Florida,171.4,281.2,0.378701


In [64]:
filtered_df_2 = USCDI_sex[USCDI_sex['Stratification1'].isin(['Age 0-44', 'Age 45-64'])]
result = filtered_df_2.groupby('LocationDesc', as_index=False)['DataValue'].sum()
merged_table = pd.merge(coronary_proportions, result, on='LocationDesc')

merged_table["CHDPercentage"] = merged_table["DataValue"]/100000
merged_table["CHDPercentage_F"] = merged_table["CHDPercentage"] * merged_table["Frac_F"]
merged_table["CHDPercentage_M"] = merged_table["CHDPercentage"] * (1 - merged_table["Frac_F"])

merged_table.head()

Unnamed: 0,LocationDesc,Female,Male,Frac_F,DataValue,CHDPercentage,CHDPercentage_F,CHDPercentage_M
0,Alabama,134.8,232.4,0.367102,90.4,0.000904,0.000332,0.000572
1,Alaska,82.0,157.4,0.342523,72.3,0.000723,0.000248,0.000475
2,Arizona,137.8,236.5,0.368154,70.4,0.000704,0.000259,0.000445
3,Arkansas,229.6,390.4,0.370323,169.0,0.00169,0.000626,0.001064
4,California,128.9,222.2,0.367132,66.1,0.000661,0.000243,0.000418


### Impact by State


In [None]:
condition_1_t = USCDI["Topic"] == "Cardiovascular Disease"
condition_1_q = USCDI["Question"] == "Coronary heart disease mortality among all people, underlying cause"
condition_1_dvu = USCDI["DataValueUnit"] == "cases per 100,000"
condition_1_sc1 = USCDI["StratificationCategory1"].isin(["Sex", "Age"])
condition_1_dvt = USCDI["DataValueType"] == "Age-adjusted Rate"
condition_1_syear = USCDI["YearStart"] == 2019
condition_1_eyear = USCDI["YearEnd"] == 2019

USCDI_sex = USCDI[condition_1_t & condition_1_q & condition_1_dvu & condition_1_sc1 & condition_1_dvt & condition_1_syear]
USCDI_sex.head()

### Impact by Disease

In [None]:
condition_31 = USCDI["Topic"] == "Cardiovascular Disease"
condition_32 = USCDI["Question"] == "Coronary heart disease mortality among all people, underlying cause"
condition_33 = USCDI["DataValueUnit"] == "cases per 100,000"
condition_34 = USCDI["StratificationCategory1"].isin(["Sex", "Age"])
USCDI_sex = USCDI[condition_11 & condition_12 & condition_13 & condition_14]
USCDI_sex.head()