In [None]:
"""
combine data gathered and saved in several pickle files

output to one combined pickle file
"""

In [5]:
import pickle
import pandas as pd
import numpy as np

In [6]:
# state FIPS codes
with open('st_fips_codes.pickle', 'rb') as read_file:
    fips_codes = pickle.load(read_file)
    
fips_codes.head()

Unnamed: 0,state,abbr,FIPS
0,Alabama,AL,1
1,Alaska,AK,2
2,Arizona,AZ,4
3,Arkansas,AR,5
4,California,CA,6


In [7]:
# features from Wikipedia
with open('county_features.pickle', 'rb') as read_file:
    county_features = pickle.load(read_file)
    
county_features.head()

Unnamed: 0,state,county_name,FIPS_code,county_page_url,highways,hwy_count,cities,city_count,towns,town_count
0,Alabama,"Autauga County, Alabama",1,"https://en.wikipedia.org/wiki/Autauga_County,_...","[Interstate 65, U.S. Highway 31, U.S. Highway ...",6,"[Millbrook, Prattville]",2,"[Autaugaville, Billingsley]",2
1,Alabama,"Baldwin County, Alabama",3,"https://en.wikipedia.org/wiki/Baldwin_County,_...","[Interstate 10, Interstate 65, U.S. Highway 31...",12,"[Bay Minette, Daphne, Fairhope, Foley, Gulf Sh...",8,"[Elberta, Loxley, Magnolia Springs, Perdido Be...",6
2,Alabama,"Barbour County, Alabama",5,"https://en.wikipedia.org/wiki/Barbour_County,_...","[U.S. Highway 82, U.S. Highway 431, State Rout...",11,"[Clio, Eufaula]",2,"[Bakerhill, Blue Springs, Clayton, Louisville]",4
3,Alabama,"Bibb County, Alabama",7,"https://en.wikipedia.org/wiki/Bibb_County,_Ala...","[U.S. Highway 11, U.S. Highway 82, State Route...",8,"[Brent, Centreville]",2,"[Vance, West Blocton, Woodstock]",3
4,Alabama,"Blount County, Alabama",9,"https://en.wikipedia.org/wiki/Blount_County,_A...","[Interstate 65, U.S. Highway 31, U.S. Highway ...",9,"[Oneonta, Warrior]",2,"[Allgood, Altoona, Blountsville, Cleveland, Co...",14


In [8]:
county_features.dtypes

state              object
county_name        object
FIPS_code          object
county_page_url    object
highways           object
hwy_count           int64
cities             object
city_count          int64
towns              object
town_count          int64
dtype: object

In [None]:
# add state fips code to county features df
# check state names exactly the same

In [9]:
county_features['state'].unique()

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'Florida',
       'Georgia (U.S. state)', 'Hawaii', 'Idaho', 'Illinois', 'Indiana',
       'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
       'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
       'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire',
       'New Jersey', 'New Mexico', 'New York (state)', 'North Carolina',
       'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
       'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee',
       'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington (state)',
       'West Virginia', 'Wisconsin', 'Wyoming'], dtype=object)

In [10]:
fips_codes['state'].unique()

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
       'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas',
       'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts',
       'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana',
       'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
       'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma',
       'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
       'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont',
       'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming',
       'American Samoa', 'Guam', 'Northern Mariana Islands',
       'Puerto Rico', 'Virgin Islands'], dtype=object)

In [11]:
# revise state names in the county features df to remove specification of (state)
county_features.loc[county_features['state'] == 'Georgia (U.S. state)', ['state']] = 'Georgia'
county_features.loc[county_features['state'] == 'New York (state)', ['state']] = 'New York'
county_features.loc[county_features['state'] == 'Washington (state)', ['state']] = 'Washington'


In [12]:
county_features['state'].unique()

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
       'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas',
       'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts',
       'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana',
       'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
       'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma',
       'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
       'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont',
       'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'],
      dtype=object)

In [13]:
cnty_features = pd.merge(county_features, fips_codes, how='left', on='state' )
cnty_features.head()

Unnamed: 0,state,county_name,FIPS_code,county_page_url,highways,hwy_count,cities,city_count,towns,town_count,abbr,FIPS
0,Alabama,"Autauga County, Alabama",1,"https://en.wikipedia.org/wiki/Autauga_County,_...","[Interstate 65, U.S. Highway 31, U.S. Highway ...",6,"[Millbrook, Prattville]",2,"[Autaugaville, Billingsley]",2,AL,1
1,Alabama,"Baldwin County, Alabama",3,"https://en.wikipedia.org/wiki/Baldwin_County,_...","[Interstate 10, Interstate 65, U.S. Highway 31...",12,"[Bay Minette, Daphne, Fairhope, Foley, Gulf Sh...",8,"[Elberta, Loxley, Magnolia Springs, Perdido Be...",6,AL,1
2,Alabama,"Barbour County, Alabama",5,"https://en.wikipedia.org/wiki/Barbour_County,_...","[U.S. Highway 82, U.S. Highway 431, State Rout...",11,"[Clio, Eufaula]",2,"[Bakerhill, Blue Springs, Clayton, Louisville]",4,AL,1
3,Alabama,"Bibb County, Alabama",7,"https://en.wikipedia.org/wiki/Bibb_County,_Ala...","[U.S. Highway 11, U.S. Highway 82, State Route...",8,"[Brent, Centreville]",2,"[Vance, West Blocton, Woodstock]",3,AL,1
4,Alabama,"Blount County, Alabama",9,"https://en.wikipedia.org/wiki/Blount_County,_A...","[Interstate 65, U.S. Highway 31, U.S. Highway ...",9,"[Oneonta, Warrior]",2,"[Allgood, Altoona, Blountsville, Cleveland, Co...",14,AL,1


In [14]:
cnty_features.tail()

Unnamed: 0,state,county_name,FIPS_code,county_page_url,highways,hwy_count,cities,city_count,towns,town_count,abbr,FIPS
3089,Wyoming,"Sweetwater County, Wyoming",37,https://en.wikipedia.org/wiki/Sweetwater_Count...,[],0,"[Green River, Rock Springs]",2,"[Bairoil, Granger, Superior, Wamsutter]",4,WY,56
3090,Wyoming,"Teton County, Wyoming",39,"https://en.wikipedia.org/wiki/Teton_County,_Wy...",[],0,[],2,[Jackson],1,WY,56
3091,Wyoming,"Uinta County, Wyoming",41,"https://en.wikipedia.org/wiki/Uinta_County,_Wy...","[Interstate 80, U.S. Highway 189]",2,[Evanston],1,"[Bear River, Lyman, Mountain View]",3,WY,56
3092,Wyoming,"Washakie County, Wyoming",43,"https://en.wikipedia.org/wiki/Washakie_County,...",[],0,[Worland],1,[Ten Sleep],1,WY,56
3093,Wyoming,"Weston County, Wyoming",45,"https://en.wikipedia.org/wiki/Weston_County,_W...","[U.S. Highway 16, U.S. Highway 20, U.S. Highwa...",6,[Newcastle],1,[Upton],1,WY,56


In [15]:
# import pickle files with usda data

with open('pop_est_2019.pickle', 'rb') as read_file:
    pop_est = pickle.load(read_file)
    
    
pop_est.head()

Unnamed: 0,FIPStxt,State,Area_Name,POP_ESTIMATE_2019,N_POP_CHG_2019
0,0,US,United States,328239523,1552022
1,1000,AL,Alabama,4903185,15504
2,1001,AL,Autauga County,55869,336
3,1003,AL,Baldwin County,223234,5379
4,1005,AL,Barbour County,24686,-186


In [16]:
pop_est.dtypes

FIPStxt               int64
State                object
Area_Name            object
POP_ESTIMATE_2019    object
N_POP_CHG_2019       object
dtype: object

In [17]:
pop_est['FIPStxt'] = pop_est['FIPStxt'].astype(str)
pop_est.dtypes

FIPStxt              object
State                object
Area_Name            object
POP_ESTIMATE_2019    object
N_POP_CHG_2019       object
dtype: object

In [19]:
# check state rows have FIPS ending in '000'

pop_est[pop_est['FIPStxt'].str[-3:] == '000']

Unnamed: 0,FIPStxt,State,Area_Name,POP_ESTIMATE_2019,N_POP_CHG_2019
1,1000,AL,Alabama,4903185,15504.0
69,2000,AK,Alaska,731545,-3594.0
99,4000,AZ,Arizona,7278717,120693.0
115,5000,AR,Arkansas,3017804,8071.0
191,6000,CA,California,39512223,50635.0
250,8000,CO,Colorado,5758736,67449.0
315,9000,CT,Connecticut,3565287,-6233.0
324,10000,DE,Delaware,973764,8285.0
328,11000,DC,District of Columbia,705749,4202.0
330,12000,FL,Florida,21477737,233420.0


In [22]:
# keep only counties in pop_est df

cnty_pop_est = pop_est[pop_est['FIPStxt'].str[-3:] != '000']

cnty_pop_est.head()

Unnamed: 0,FIPStxt,State,Area_Name,POP_ESTIMATE_2019,N_POP_CHG_2019
0,0,US,United States,328239523,1552022
2,1001,AL,Autauga County,55869,336
3,1003,AL,Baldwin County,223234,5379
4,1005,AL,Barbour County,24686,-186
5,1007,AL,Bibb County,22394,27


In [23]:
# remove all US row
cnty_pop_est = cnty_pop_est[cnty_pop_est['State'] != 'US']

cnty_pop_est.head()

Unnamed: 0,FIPStxt,State,Area_Name,POP_ESTIMATE_2019,N_POP_CHG_2019
2,1001,AL,Autauga County,55869,336
3,1003,AL,Baldwin County,223234,5379
4,1005,AL,Barbour County,24686,-186
5,1007,AL,Bibb County,22394,27
6,1009,AL,Blount County,57826,55


In [24]:
cnty_pop_est['FIPS_code'] = cnty_pop_est['FIPStxt'].apply(lambda x: str(x).zfill(5))

In [25]:
cnty_pop_est.head()

Unnamed: 0,FIPStxt,State,Area_Name,POP_ESTIMATE_2019,N_POP_CHG_2019,FIPS_code
2,1001,AL,Autauga County,55869,336,1001
3,1003,AL,Baldwin County,223234,5379,1003
4,1005,AL,Barbour County,24686,-186,1005
5,1007,AL,Bibb County,22394,27,1007
6,1009,AL,Blount County,57826,55,1009


In [26]:
len(cnty_pop_est)

3220

In [27]:
with open('unemp_inc_pivot.pickle', 'rb') as read_file:
    unemp_inc = pickle.load(read_file)
    
unemp_inc.head()

Attribute,fips_txt,Stabr,area_name,Civilian_labor_force_2019,Employed_2019,Med_HH_Income_Percent_of_State_Total_2019,Median_Household_Income_2019,Metro_2013,Rural_urban_continuum_code_2013,Unemployed_2019,Unemployment_rate_2019,Urban_influence_code_2013
0,0,US,United States,163100055.0,157115247.0,,65712.0,,,5984808.0,3.669409,
1,1000,AL,Alabama,2241747.0,2174483.0,100.0,51771.0,,,67264.0,3.0,
2,1001,AL,"Autauga County, AL",26172.0,25458.0,112.481888,58233.0,1.0,2.0,714.0,2.7,2.0
3,1003,AL,"Baldwin County, AL",97328.0,94675.0,115.645828,59871.0,1.0,3.0,2653.0,2.7,2.0
4,1005,AL,"Barbour County, AL",8537.0,8213.0,69.482918,35972.0,0.0,6.0,324.0,3.8,6.0


In [28]:
unemp_inc.dtypes

Attribute
fips_txt                                       int64
Stabr                                         object
area_name                                     object
Civilian_labor_force_2019                    float64
Employed_2019                                float64
Med_HH_Income_Percent_of_State_Total_2019    float64
Median_Household_Income_2019                 float64
Metro_2013                                   float64
Rural_urban_continuum_code_2013              float64
Unemployed_2019                              float64
Unemployment_rate_2019                       float64
Urban_influence_code_2013                    float64
dtype: object

In [29]:
unemp_inc['fips_txt'] = unemp_inc['fips_txt'].astype(str)
unemp_inc.dtypes

Attribute
fips_txt                                      object
Stabr                                         object
area_name                                     object
Civilian_labor_force_2019                    float64
Employed_2019                                float64
Med_HH_Income_Percent_of_State_Total_2019    float64
Median_Household_Income_2019                 float64
Metro_2013                                   float64
Rural_urban_continuum_code_2013              float64
Unemployed_2019                              float64
Unemployment_rate_2019                       float64
Urban_influence_code_2013                    float64
dtype: object

In [30]:
# keep only counties
cnty_unemp_inc = unemp_inc[unemp_inc['fips_txt'].str[-3:] != '000']
cnty_unemp_inc.head()

Attribute,fips_txt,Stabr,area_name,Civilian_labor_force_2019,Employed_2019,Med_HH_Income_Percent_of_State_Total_2019,Median_Household_Income_2019,Metro_2013,Rural_urban_continuum_code_2013,Unemployed_2019,Unemployment_rate_2019,Urban_influence_code_2013
0,0,US,United States,163100055.0,157115247.0,,65712.0,,,5984808.0,3.669409,
2,1001,AL,"Autauga County, AL",26172.0,25458.0,112.481888,58233.0,1.0,2.0,714.0,2.7,2.0
3,1003,AL,"Baldwin County, AL",97328.0,94675.0,115.645828,59871.0,1.0,3.0,2653.0,2.7,2.0
4,1005,AL,"Barbour County, AL",8537.0,8213.0,69.482918,35972.0,0.0,6.0,324.0,3.8,6.0
5,1007,AL,"Bibb County, AL",8685.0,8419.0,92.55761,47918.0,1.0,1.0,266.0,3.1,1.0


In [31]:
cnty_unemp_inc = cnty_unemp_inc[cnty_unemp_inc['Stabr'] != 'US']

In [32]:
len(cnty_unemp_inc)

3222

In [33]:
# fips_txt needs leading 0 for states 1-9

cnty_unemp_inc['FIPS_code'] = cnty_unemp_inc['fips_txt'].apply(lambda x: str(x).zfill(5))

cnty_unemp_inc.head()

Attribute,fips_txt,Stabr,area_name,Civilian_labor_force_2019,Employed_2019,Med_HH_Income_Percent_of_State_Total_2019,Median_Household_Income_2019,Metro_2013,Rural_urban_continuum_code_2013,Unemployed_2019,Unemployment_rate_2019,Urban_influence_code_2013,FIPS_code
2,1001,AL,"Autauga County, AL",26172.0,25458.0,112.481888,58233.0,1.0,2.0,714.0,2.7,2.0,1001
3,1003,AL,"Baldwin County, AL",97328.0,94675.0,115.645828,59871.0,1.0,3.0,2653.0,2.7,2.0,1003
4,1005,AL,"Barbour County, AL",8537.0,8213.0,69.482918,35972.0,0.0,6.0,324.0,3.8,6.0,1005
5,1007,AL,"Bibb County, AL",8685.0,8419.0,92.55761,47918.0,1.0,1.0,266.0,3.1,1.0,1007
6,1009,AL,"Blount County, AL",25331.0,24655.0,102.184624,52902.0,1.0,1.0,676.0,2.7,1.0,1009


In [42]:
# new column to combine state FIPS and county FIPS in cnty_features df

cnty_features['FIPS_code_NEW'] = cnty_features['FIPS'] + cnty_features['FIPS_code']
cnty_features.head()

Unnamed: 0,state,county_name,FIPS_code,county_page_url,highways,hwy_count,cities,city_count,towns,town_count,abbr,FIPS,FIPS_code_NEW
0,Alabama,"Autauga County, Alabama",1,"https://en.wikipedia.org/wiki/Autauga_County,_...","[Interstate 65, U.S. Highway 31, U.S. Highway ...",6,"[Millbrook, Prattville]",2,"[Autaugaville, Billingsley]",2,AL,1,1001
1,Alabama,"Baldwin County, Alabama",3,"https://en.wikipedia.org/wiki/Baldwin_County,_...","[Interstate 10, Interstate 65, U.S. Highway 31...",12,"[Bay Minette, Daphne, Fairhope, Foley, Gulf Sh...",8,"[Elberta, Loxley, Magnolia Springs, Perdido Be...",6,AL,1,1003
2,Alabama,"Barbour County, Alabama",5,"https://en.wikipedia.org/wiki/Barbour_County,_...","[U.S. Highway 82, U.S. Highway 431, State Rout...",11,"[Clio, Eufaula]",2,"[Bakerhill, Blue Springs, Clayton, Louisville]",4,AL,1,1005
3,Alabama,"Bibb County, Alabama",7,"https://en.wikipedia.org/wiki/Bibb_County,_Ala...","[U.S. Highway 11, U.S. Highway 82, State Route...",8,"[Brent, Centreville]",2,"[Vance, West Blocton, Woodstock]",3,AL,1,1007
4,Alabama,"Blount County, Alabama",9,"https://en.wikipedia.org/wiki/Blount_County,_A...","[Interstate 65, U.S. Highway 31, U.S. Highway ...",9,"[Oneonta, Warrior]",2,"[Allgood, Altoona, Blountsville, Cleveland, Co...",14,AL,1,1009


In [43]:
with open('county_typology.pickle', 'rb') as read_file:
    cnty_type = pickle.load(read_file)
    
cnty_type.head()

Unnamed: 0,FIPStxt,State,County_name,"Metro-nonmetro status, 2013 0=Nonmetro 1=Metro",Economic Types Type_2015_Update non-overlapping,Economic_Type_Label,Farming_2015_Update,Mining_2015-Update,Manufacturing_2015_Update,Government_2015_Update,Recreation_2015_Update,Nonspecialized_2015_Update,Low_Education_2015_Update,Low_Employment_Cnty_2008_2012_25_64,Pop_Loss_2010,Retirement_Dest_2015_Update,Persistent_Poverty_2013,Persistent_Related_Child_Poverty_2013
0,1001,AL,Autauga County,1,0,Nonspecialized,0,0,0,0,0,1,0,0,0,1,0,0
1,1003,AL,Baldwin County,1,5,Recreation,0,0,0,0,1,0,0,0,0,1,0,0
2,1005,AL,Barbour County,0,3,Maufacturing,0,0,1,0,0,0,1,1,0,0,1,1
3,1007,AL,Bibb County,1,0,Nonspecialized,0,0,0,0,0,1,1,1,0,0,0,1
4,1009,AL,Blount County,1,0,Nonspecialized,0,0,0,0,0,1,1,1,0,0,0,0


In [44]:
len(cnty_type)

3143

In [45]:
cnty_type['FIPS_code'] = cnty_type['FIPStxt'].apply(lambda x: str(x).zfill(5))

In [46]:
cnty_type.head()

Unnamed: 0,FIPStxt,State,County_name,"Metro-nonmetro status, 2013 0=Nonmetro 1=Metro",Economic Types Type_2015_Update non-overlapping,Economic_Type_Label,Farming_2015_Update,Mining_2015-Update,Manufacturing_2015_Update,Government_2015_Update,Recreation_2015_Update,Nonspecialized_2015_Update,Low_Education_2015_Update,Low_Employment_Cnty_2008_2012_25_64,Pop_Loss_2010,Retirement_Dest_2015_Update,Persistent_Poverty_2013,Persistent_Related_Child_Poverty_2013,FIPS_code
0,1001,AL,Autauga County,1,0,Nonspecialized,0,0,0,0,0,1,0,0,0,1,0,0,1001
1,1003,AL,Baldwin County,1,5,Recreation,0,0,0,0,1,0,0,0,0,1,0,0,1003
2,1005,AL,Barbour County,0,3,Maufacturing,0,0,1,0,0,0,1,1,0,0,1,1,1005
3,1007,AL,Bibb County,1,0,Nonspecialized,0,0,0,0,0,1,1,1,0,0,0,1,1007
4,1009,AL,Blount County,1,0,Nonspecialized,0,0,0,0,0,1,1,1,0,0,0,0,1009


In [47]:
# merge cnty_unemp_inc, cnty_pop_est, cnty_type, cnty_features


all_data_1 = pd.merge(cnty_unemp_inc, cnty_pop_est, how='left', on='FIPS_code')

In [48]:
all_data_2 = pd.merge(all_data_1, cnty_type, how='left', on='FIPS_code')

In [49]:
all_data_3 = pd.merge(all_data_2, cnty_features, how='left', left_on='FIPS_code', right_on='FIPS_code_NEW')

In [51]:
all_data_3.head()

Unnamed: 0,fips_txt,Stabr,area_name,Civilian_labor_force_2019,Employed_2019,Med_HH_Income_Percent_of_State_Total_2019,Median_Household_Income_2019,Metro_2013,Rural_urban_continuum_code_2013,Unemployed_2019,...,county_page_url,highways,hwy_count,cities,city_count,towns,town_count,abbr,FIPS,FIPS_code_NEW
0,1001,AL,"Autauga County, AL",26172.0,25458.0,112.481888,58233.0,1.0,2.0,714.0,...,"https://en.wikipedia.org/wiki/Autauga_County,_...","[Interstate 65, U.S. Highway 31, U.S. Highway ...",6.0,"[Millbrook, Prattville]",2.0,"[Autaugaville, Billingsley]",2.0,AL,1,1001
1,1003,AL,"Baldwin County, AL",97328.0,94675.0,115.645828,59871.0,1.0,3.0,2653.0,...,"https://en.wikipedia.org/wiki/Baldwin_County,_...","[Interstate 10, Interstate 65, U.S. Highway 31...",12.0,"[Bay Minette, Daphne, Fairhope, Foley, Gulf Sh...",8.0,"[Elberta, Loxley, Magnolia Springs, Perdido Be...",6.0,AL,1,1003
2,1005,AL,"Barbour County, AL",8537.0,8213.0,69.482918,35972.0,0.0,6.0,324.0,...,"https://en.wikipedia.org/wiki/Barbour_County,_...","[U.S. Highway 82, U.S. Highway 431, State Rout...",11.0,"[Clio, Eufaula]",2.0,"[Bakerhill, Blue Springs, Clayton, Louisville]",4.0,AL,1,1005
3,1007,AL,"Bibb County, AL",8685.0,8419.0,92.55761,47918.0,1.0,1.0,266.0,...,"https://en.wikipedia.org/wiki/Bibb_County,_Ala...","[U.S. Highway 11, U.S. Highway 82, State Route...",8.0,"[Brent, Centreville]",2.0,"[Vance, West Blocton, Woodstock]",3.0,AL,1,1007
4,1009,AL,"Blount County, AL",25331.0,24655.0,102.184624,52902.0,1.0,1.0,676.0,...,"https://en.wikipedia.org/wiki/Blount_County,_A...","[Interstate 65, U.S. Highway 31, U.S. Highway ...",9.0,"[Oneonta, Warrior]",2.0,"[Allgood, Altoona, Blountsville, Cleveland, Co...",14.0,AL,1,1009


In [53]:
all_data_3['Stabr'].unique()

array(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA',
       'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA',
       'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY',
       'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
       'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'PR'], dtype=object)

In [54]:
# drop PR

county_data = all_data_3[all_data_3['Stabr'] != 'PR']

In [55]:
county_data['Stabr'].unique()

array(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA',
       'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA',
       'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY',
       'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
       'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'], dtype=object)

In [56]:
len(county_data)

3144

In [57]:
#check results of merge
county_data.to_csv('county_data_all.csv', index=False)

In [58]:
with open('county_data.pickle', 'wb') as to_write:
    pickle.dump(county_data, to_write)