In [6]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [47]:
def merge_pop_data(state_name):
    '''
    This function will merge the two population datasets together.
    Input: state_name
        state_name: the state name
    return: a pandas dataframe with the merged population data
    '''
    # read in the data
    all_2000_2009 = pd.read_csv('../00_source_data/US_Population/start_2000.csv', encoding='latin-1')
    all_2010_2019 = pd.read_csv('../00_source_data/US_Population/start_2010.csv', encoding='latin-1')

    # columns to keep
    pop_col_2000_2009 = ['STNAME', 'CTYNAME', 'POPESTIMATE2000', 'POPESTIMATE2001', 'POPESTIMATE2002','POPESTIMATE2003', 'POPESTIMATE2004',
                        'POPESTIMATE2005', 'POPESTIMATE2006','POPESTIMATE2007', 'POPESTIMATE2008', 'POPESTIMATE2009']
    pop_col_2010_2019 = ['STNAME', 'CTYNAME', 'POPESTIMATE2010', 'POPESTIMATE2011', 'POPESTIMATE2012','POPESTIMATE2013', 'POPESTIMATE2014',
                        'POPESTIMATE2015', 'POPESTIMATE2016','POPESTIMATE2017', 'POPESTIMATE2018', 'POPESTIMATE2019']

    # filter the data
    pop_2000_2009 = all_2000_2009[all_2000_2009['STNAME'] == state_name][pop_col_2000_2009]
    pop_2010_2019 = all_2010_2019[all_2010_2019['STNAME'] == state_name][pop_col_2010_2019]

    # rename the columns
    pop_2000_2009.columns = ['STNAME', 'CTYNAME', '2000', '2001', '2002','2003', '2004','2005','2006', '2007', '2008', '2009']
    pop_2010_2019.columns = ['STNAME', 'CTYNAME', '2010', '2011', '2012','2013', '2014','2015','2016', '2017', '2018', '2019']

    assert pop_2000_2009.shape[0] == pop_2010_2019.shape[0]  # number of county should be the same
    # merge two datasets
    pop_2000_2019 = pd.merge(pop_2000_2009, pop_2010_2019, how = 'outer', on = ['STNAME', 'CTYNAME'])

    # remove the aggregate population of the state
    pop_2000_2019 = pop_2000_2019[pop_2000_2019['CTYNAME'] != state_name]

    # convert columns to rows   
    pop = pd.melt(pop_2000_2019, id_vars=['STNAME', 'CTYNAME'], var_name='YEAR', value_name='POPULATION')
    assert pop.shape[0] == pop_2000_2019.shape[0] * (pop_2000_2019.shape[1]-2)  # number of rows should be the same
    assert pop['YEAR'].nunique() == (pop_2000_2019.shape[1]-2)  # number of years should be the same
    
    return pop



### Florida and its reference states

AZ, CO, FL, LA, NV, SC

In [48]:
pop_az = merge_pop_data('Arizona')
pop_co = merge_pop_data('Colorado')
pop_fl = merge_pop_data('Florida')
pop_la = merge_pop_data('Louisiana')
pop_nv = merge_pop_data('Nevada')
pop_sc = merge_pop_data('South Carolina')

In [54]:
fl_shipment_pop = pd.concat([pop_az, pop_fl, pop_la, pop_sc])
assert fl_shipment_pop.shape[0] == pop_az.shape[0] + pop_fl.shape[0] + pop_la.shape[0] + pop_sc.shape[0]  # make sure the number of rows is the same

# write the data to csv
fl_shipment_pop.to_csv('../20_intermediate_files/fl_shipment_pop.csv', index=False)

In [53]:
fl_death_pop = pd.concat([pop_co, pop_fl, pop_la, pop_nv])
assert fl_death_pop.shape[0] == pop_co.shape[0] + pop_fl.shape[0] + pop_la.shape[0] + pop_nv.shape[0]  # make sure the number of rows is the same

# write the data to csv
fl_death_pop.to_csv('../20_intermediate_files/fl_death_pop.csv', index=False)

### Texas and its reference states

Unnamed: 0,COUNTY,YEAR,POPULATION,STATE
0,Apache County,2000,69507,AZ
1,Cochise County,2000,118132,AZ
2,Coconino County,2000,116773,AZ
3,Gila County,2000,51332,AZ
4,Graham County,2000,33511,AZ
...,...,...,...,...
295,Pima County,2019,1047279,AZ
296,Pinal County,2019,462789,AZ
297,Santa Cruz County,2019,46498,AZ
298,Yavapai County,2019,235099,AZ
