# Load Libraries

Census 2000-2009: https://www.census.gov/data/tables/time-series/demo/popest/intercensal-2000-2010-counties.html

Census 2010-2019: https://www.census.gov/data/tables/time-series/demo/popest/2010s-counties-total.html

In [1]:
import requests
import pandas as pd
import csv
import os
import sys
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from tqdm import tqdm

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

#Change directory to project root directory
os.chdir("..")

from src.data.ACS_API import acs_api
from api_credentials import API_KEY
from src.data.PEP_API import pep_api

# Load CDC Diabetes Data

In [18]:
df_diabetes=pd.read_csv('data/raw/CDC_Data_yearly/First_csv/DiabetesAtlasCountyData.csv',skiprows=2,skipfooter=1,engine='python')
df_diabetes['Year']=2004

filenames = glob('data/raw/CDC_Data_yearly/*.csv')
for file in filenames:
    with open(file, newline='') as f:
        reader = csv.reader(f)
        row1 = next(reader)  # gets the first line
    year=row1[0].split(';')[-1].replace(' ','')
    df_diabetes_=pd.read_csv(file,skiprows=2,skipfooter=1,engine='python')
    df_diabetes_['Year']=year
    df_diabetes=pd.concat([df_diabetes_,df_diabetes])
df_diabetes=df_diabetes.sort_values(['State','County','Year'])
df_diabetes.reset_index(drop=True,inplace=True)

In [20]:
df_diabetes.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45164 entries, 0 to 45163
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   County        45164 non-null  object
 1   State         45164 non-null  object
 2   CountyFIPS    45164 non-null  int64 
 3   Percentage    45164 non-null  object
 4   Lower Limit   45164 non-null  object
 5    Upper Limit  45164 non-null  object
 6   Year          45164 non-null  object
dtypes: int64(1), object(6)
memory usage: 2.4+ MB


In [21]:
df_diabetes.head(1000)

Unnamed: 0,County,State,CountyFIPS,Percentage,Lower Limit,Upper Limit,Year
0,Autauga County,Alabama,1001,10.1,7.9,12.8,2004
1,Autauga County,Alabama,1001,11.5,8.8,14.7,2005
2,Autauga County,Alabama,1001,11.0,8.5,13.9,2006
3,Autauga County,Alabama,1001,11.2,8.6,14.1,2007
4,Autauga County,Alabama,1001,10.9,8.5,13.5,2008
5,Autauga County,Alabama,1001,11.7,9.2,14.7,2009
6,Autauga County,Alabama,1001,11.2,8.8,13.9,2010
7,Autauga County,Alabama,1001,11.3,8.9,14.1,2011
8,Autauga County,Alabama,1001,11.1,8.8,13.8,2012
9,Autauga County,Alabama,1001,11.9,9.6,14.6,2013


# Write CDC Diabetes Data

In [22]:
df_diabetes.to_csv('data/raw/diabetes_data_2004_2017.csv')

# Load ACS Demographic Data

In [5]:
#List of desired variables and colloquial labels to obtain from ACS
variable_list={'total_pop':'DP05_0001E',
'male':'DP05_0002E',
'female':'DP05_0003E',
'25_34_years':'DP05_0010E',
'35_44_years':'DP05_0011E',
'45_54_years':'DP05_0012E',
'55_59_years':'DP05_0013E',
'60_64_years':'DP05_0014E',
'65_74_years':'DP05_0015E',
'75_84_years':'DP05_0016E',
'85_plus_years':'DP05_0017E',
'hispanic_pop':'DP05_0071E',
'white_pop':'DP05_0064E',
'black_pop':'DP05_0065E',
'amer_indian_pop':'DP05_0066E',
'asian_pop':'DP05_0067E',
'pacific_island_pop':'DP05_0068E',
'other_pop':'DP05_0069E',
'education_hs':'DP02_0066E',
'education_bach':'DP02_0067E',
'median_income':'DP03_0062E'}

In [6]:
#Do initial call to create dataframe 
df_census=acs_api(API_KEY,requests,pd)
#Create list of years to query
years=[str(i+2006) for i in range(14)]
#Get data for all years and variables in one dataframe 
for year in tqdm(years):
    for var_label,var_name in variable_list.items():
        df_census_=acs_api(API_KEY,requests,pd,var_label,var_name,year)
        df_census=pd.concat([df_census,df_census_])

100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [12:44<00:00, 54.59s/it]


In [7]:
df_census.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 242274 entries, 0 to 839
Data columns (total 7 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   county       242274 non-null  object
 1   state        242274 non-null  object
 2   value        242274 non-null  object
 3   state_fips   242274 non-null  object
 4   county_fips  242274 non-null  object
 5   year         242274 non-null  object
 6   variable     242274 non-null  object
dtypes: object(7)
memory usage: 14.8+ MB


In [8]:
df_census.head()

Unnamed: 0,county,state,value,state_fips,county_fips,year,variable
0,Baldwin County,Alabama,169162,1,3,2006,total_pop
1,Calhoun County,Alabama,112903,1,15,2006,total_pop
2,Cullman County,Alabama,80187,1,43,2006,total_pop
3,DeKalb County,Alabama,68014,1,49,2006,total_pop
4,Elmore County,Alabama,75688,1,51,2006,total_pop


# Write ACS Demographic Data

In [11]:
df_census.to_csv('data/raw/census_data_2006_2019.csv')

# Load PEP Population Data 

In [2]:
df_pop=pep_api(API_KEY,requests,pd,year='2000')
df_pop=pd.concat([df_pop,pep_api(API_KEY,requests,pd,year='2019')])

# Write ACS Demographic Data

In [3]:
df_pop.to_csv('data/raw/population_est_2000_2019.csv')

In [None]:
#Alternative variable list for ACS with pcts included
variable_list_pct={'total_pop':'DP05_0001E',
'male':'DP05_0002E',
'male_pct':'DP05_0002PE',
'female':'DP05_0003E',
'female_pct':'DP05_0003PE',
'25_34_years':'DP05_0010E',
'25_34_years_pct':'DP05_0010PE',
'35_44_years':'DP05_0011E',
'35_44_years_pct':'DP05_0011PE',
'45_54_years':'DP05_0012E',
'45_54_years_pct':'DP05_0012PE',
'55_59_years':'DP05_0013E',
'55_59_years_pct':'DP05_0013PE',
'60_64_years':'DP05_0014E',
'60_64_years_pct':'DP05_0014PE',
'65_74_years':'DP05_0015E',
'65_74_years_pct':'DP05_0014PE',
'75_84_years':'DP05_0016E',
'75_84_years_pct':'DP05_0016PE',
'85_plus_years':'DP05_0017E',
'85_plus_years_pct':'DP05_0017PE',
'hispanic_pop':'DP05_0071E',
'hispanic_pct':'DP05_0071PE',
'white_pop':'DP05_0064E',
'white_pct':'DP05_0064PE',
'black_pop':'DP05_0065E',
'black_pct':'DP05_0065PE',
'amer_indian_pop':'DP05_0066E',
'amer_indian_pct':'DP05_0066PE',
'asian_pop':'DP05_0067E',
'asian_pct':'DP05_0067PE',
'pacific_island_pop':'DP05_0068E',
'pacific_island_pct':'DP05_0068PE',
'other_pop':'DP05_0069E',
'other_pct':'DP05_0069PE',
'education_hs':'DP02_0066E',
'education_hs_pct':'DP02_0066PE',
'education_bach':'DP02_0067E',
'education_bach_pct':'DP02_0067PE',
'median_income':'DP03_0062E'}

Variable Change
Estimate!!Race alone or in combination with one or more other races!!Total population!!White

DP05_0059E in 2016
https://api.census.gov/data/2019/acs/acs1/profile/variables.html

DP05_0064E in 2019 
https://api.census.gov/data/2016/acs/acs1/profile/variables.html
   