# Health outcomes 

- Both the 500 CITIES and the PLACES datasets share measure definitions and point to the [official website](https://www.cdc.gov/places/site.html) for information on that. The variables under [Health Outcomes](https://www.cdc.gov/places/measure-definitions/health-outcomes.html) seem best fitted to our project. 

    1. Arthritis among adults, `ARTHRITIS`
    2. High blood pressure among adults, `BPHIGH`
    3. Cancer (non-skin) or melanoma among adults, `CANCER`
    4. Current asthma among adults, `CASTHMA`
    5. Coronary heart disease among adults, `CHD`
    6. Chronic obstructive pulmonary disease among adults, `COPD`
    7. Diagnosed diabetes among adults, `DIABETES`
    8. High cholesterol among adults who have ever been screened, `HIGHCOL`
    9. Chronic kidney disease among adults, `KIDNEY`
    10. Depression among adults, `DEPRESSION` 
    11. Obesity among adults, `OBESITY`
    12. Stroke among adults, `STROKE`

    13. *I retained the `PHLTH` variable (also data from 2019) that comes from the Health Status measure, in case that is something we want to look at, but easy to drop if not.*

In [1]:
# import necessary libraries
import pandas as pd
import numpy as np


In [2]:
# load the datasets
places_2021 = pd.read_csv("places_2021.csv")


In [3]:
# standardize column names to lowercase to make it easier to work with
places_2021.columns = places_2021.columns.astype(str).str.lower()


In [4]:
places_2021.head()


Unnamed: 0,tractfips,countyfips,stateabbr,statedesc,countyname,totalpopulation,access2_crudeprev,access2_crude95ci,arthritis_crudeprev,arthritis_crude95ci,...,obesity_crude95ci,phlth_crudeprev,phlth_crude95ci,sleep_crudeprev,sleep_crude95ci,stroke_crudeprev,stroke_crude95ci,teethlost_crudeprev,teethlost_crude95ci,geolocation
0,4013422643,4013,AZ,Arizona,Maricopa,5789,11.9,"(10.1, 14.0)",17.0,"(16.1, 17.9)",...,"(26.9, 29.7)",9.2,"( 8.2, 10.2)",34.5,"(33.0, 36.3)",1.8,"( 1.6, 2.0)",8.0,"( 5.2, 11.6)",POINT (-111.61853 33.35726769)
1,6067007402,6067,CA,California,Sacramento,6180,15.4,"(13.5, 17.3)",24.6,"(23.8, 25.3)",...,"(29.6, 31.4)",15.1,"(14.2, 16.2)",35.7,"(34.8, 36.7)",3.9,"( 3.6, 4.3)",18.2,"(13.7, 23.5)",POINT (-121.3791473 38.6869681)
2,1089000201,1089,AL,Alabama,Madison,760,25.4,"(21.2, 30.1)",36.0,"(34.6, 37.3)",...,"(46.6, 49.7)",22.5,"(20.4, 24.7)",50.3,"(48.8, 51.3)",7.6,"( 6.8, 8.6)",33.3,"(24.1, 43.8)",POINT (-86.55005486 34.77465775)
3,1101002202,1101,AL,Alabama,Montgomery,1185,25.2,"(21.2, 29.4)",36.1,"(35.0, 37.3)",...,"(45.3, 47.9)",20.7,"(19.1, 22.7)",49.7,"(48.3, 51.1)",7.2,"( 6.4, 8.0)",34.0,"(25.6, 42.0)",POINT (-86.30555503 32.31774882)
4,1013952800,1013,AL,Alabama,Butler,1394,14.3,"(11.9, 17.1)",36.0,"(34.5, 37.4)",...,"(32.3, 35.2)",15.2,"(13.5, 16.9)",35.7,"(33.9, 37.5)",4.2,"( 3.6, 4.8)",15.1,"( 8.9, 22.9)",POINT (-86.62833756 31.83774723)


In [5]:
# list of columns to keep based on the above rationale
columns_to_keep = [
    "tractfips",
    "countyfips",
    "stateabbr",
    "statedesc",
    "countyname",
    "totalpopulation",
    "arthritis_crudeprev",
    "arthritis_crude95ci",
    "bphigh_crudeprev",
    "bphigh_crude95ci",
    "cancer_crudeprev",
    "cancer_crude95ci",
    "casthma_crudeprev",
    "casthma_crude95ci",
    "chd_crudeprev",
    "chd_crude95ci",
    "copd_crudeprev",
    "copd_crude95ci",
    "depression_crudeprev",
    "depression_crude95ci",
    "diabetes_crudeprev",
    "diabetes_crude95ci",
    "highchol_crudeprev",
    "highchol_crude95ci",
    "kidney_crudeprev",
    "kidney_crude95ci",
    "obesity_crudeprev",
    "obesity_crude95ci",
    "phlth_crudeprev",
    "phlth_crude95ci",
    "stroke_crudeprev",
    "stroke_crude95ci",
    "geolocation",
]


In [6]:
# select only the columns we want to keep using `reindex()` from pandas

hlthout_2019 = places_2021.reindex(columns_to_keep, axis=1).replace(r"^\s*$", pd.NA, regex=True)


In [7]:
hlthout_2019.head()


Unnamed: 0,tractfips,countyfips,stateabbr,statedesc,countyname,totalpopulation,arthritis_crudeprev,arthritis_crude95ci,bphigh_crudeprev,bphigh_crude95ci,...,highchol_crude95ci,kidney_crudeprev,kidney_crude95ci,obesity_crudeprev,obesity_crude95ci,phlth_crudeprev,phlth_crude95ci,stroke_crudeprev,stroke_crude95ci,geolocation
0,4013422643,4013,AZ,Arizona,Maricopa,5789,17.0,"(16.1, 17.9)",23.7,"(22.7, 24.8)",...,"(26.7, 28.2)",2.1,"( 1.9, 2.2)",28.3,"(26.9, 29.7)",9.2,"( 8.2, 10.2)",1.8,"( 1.6, 2.0)",POINT (-111.61853 33.35726769)
1,6067007402,6067,CA,California,Sacramento,6180,24.6,"(23.8, 25.3)",30.5,"(29.7, 31.3)",...,"(29.2, 30.6)",3.4,"( 3.3, 3.6)",30.5,"(29.6, 31.4)",15.1,"(14.2, 16.2)",3.9,"( 3.6, 4.3)",POINT (-121.3791473 38.6869681)
2,1089000201,1089,AL,Alabama,Madison,760,36.0,"(34.6, 37.3)",53.7,"(52.3, 55.2)",...,"(31.5, 33.7)",5.5,"( 5.1, 5.9)",48.2,"(46.6, 49.7)",22.5,"(20.4, 24.7)",7.6,"( 6.8, 8.6)",POINT (-86.55005486 34.77465775)
3,1101002202,1101,AL,Alabama,Montgomery,1185,36.1,"(35.0, 37.3)",54.6,"(53.3, 55.8)",...,"(33.4, 35.7)",4.9,"( 4.6, 5.3)",46.7,"(45.3, 47.9)",20.7,"(19.1, 22.7)",7.2,"( 6.4, 8.0)",POINT (-86.30555503 32.31774882)
4,1013952800,1013,AL,Alabama,Butler,1394,36.0,"(34.5, 37.4)",43.0,"(41.5, 44.5)",...,"(37.7, 39.8)",3.3,"( 3.1, 3.6)",33.8,"(32.3, 35.2)",15.2,"(13.5, 16.9)",4.2,"( 3.6, 4.8)",POINT (-86.62833756 31.83774723)


In [8]:
print(hlthout_2019.columns)


Index(['tractfips', 'countyfips', 'stateabbr', 'statedesc', 'countyname',
       'totalpopulation', 'arthritis_crudeprev', 'arthritis_crude95ci',
       'bphigh_crudeprev', 'bphigh_crude95ci', 'cancer_crudeprev',
       'cancer_crude95ci', 'casthma_crudeprev', 'casthma_crude95ci',
       'chd_crudeprev', 'chd_crude95ci', 'copd_crudeprev', 'copd_crude95ci',
       'depression_crudeprev', 'depression_crude95ci', 'diabetes_crudeprev',
       'diabetes_crude95ci', 'highchol_crudeprev', 'highchol_crude95ci',
       'kidney_crudeprev', 'kidney_crude95ci', 'obesity_crudeprev',
       'obesity_crude95ci', 'phlth_crudeprev', 'phlth_crude95ci',
       'stroke_crudeprev', 'stroke_crude95ci', 'geolocation'],
      dtype='object')


In [9]:
# save the health outcomes dataframe to new CSV file
hlthout_2019.to_csv("hlthout_2019.csv", index=False)


In [10]:
measures_full = pd.read_csv("PLACES_and_500_Cities__Data_Dictionary_20250930_full.csv")

measures_full.head(44)


Unnamed: 0,MeasureID,Measure full name,Measure short name,CategoryID,Category name,PLACES Release 2024,Measure full name 2016-2023,Measure short name 2016-2023,PLACES Release 2023,PLACES Release 2022,PLACES Release 2021,PLACES Release 2020,500 Cities Release 2019,500 Cities Release 2018,500 Cities Release 2017,500 Cities Release 2016,Frequency_BRFSS_year
0,ARTHRITIS,Arthritis among adults,Arthritis,HLTHOUT,Health Outcomes,2022,Arthritis among adults aged >=18 years,,2021,2020,2019,2018,2017,2016,2015,2014,Every
1,BPHIGH,High blood pressure among adults,High Blood Pressure,HLTHOUT,Health Outcomes,2021,High blood pressure among adults aged >=18 years,,2021,2019,2019,2017,2017,2015,2015,2013,Odd
2,CANCER,Cancer (non-skin) or melanoma among adults,Cancer (non-skin) or melanoma,HLTHOUT,Health Outcomes,2022,Cancer (excluding skin cancer) among adults ag...,Cancer (except skin),2021,2020,2019,2018,2017,2016,2015,2014,Every
3,CASTHMA,Current asthma among adults,Current Asthma,HLTHOUT,Health Outcomes,2022,Current asthma among adults aged >=18 years,,2021,2020,2019,2018,2017,2016,2015,2014,Every
4,CHD,Coronary heart disease among adults,Coronary Heart Disease,HLTHOUT,Health Outcomes,2022,Coronary heart disease among adults aged >=18 ...,,2021,2020,2019,2018,2017,2016,2015,2014,Every
5,COPD,Chronic obstructive pulmonary disease among ad...,COPD,HLTHOUT,Health Outcomes,2022,Chronic obstructive pulmonary disease among ad...,,2021,2020,2019,2018,2017,2016,2015,2014,Every
6,DEPRESSION,Depression among adults,Depression,HLTHOUT,Health Outcomes,2022,Depression among adults aged >=18 years,,2021,2020,2019,X,X,X,X,X,Every
7,DIABETES,Diagnosed diabetes among adults,Diabetes,HLTHOUT,Health Outcomes,2022,Diagnosed diabetes among adults aged >=18 years,,2021,2020,2019,2018,2017,2016,2015,2014,Every
8,HIGHCHOL,High cholesterol among adults who have ever be...,High Cholesterol,HLTHOUT,Health Outcomes,2021,High cholesterol among adults aged >=18 years ...,,2021,2019,2019,2017,2017,2015,2015,2013,Odd
9,KIDNEY,Chronic kidney disease among adults aged >=18 ...,Chronic Kidney Disease,HLTHOUT,Health Outcomes,X,,,2021,2020,2019,2018,2017,2016,2015,2014,Every
