## Adding FIPS (Federal Information Processing Standards) codes to county based on zipcode from data.world

In [108]:
# import modules

import numpy as np
import pandas as pd

In [109]:
# import zipcodes
zipcode = pd.read_csv("data/zipcode_states.csv")
zipcode.head()

Unnamed: 0,zipcode,latitude,longitude,city,state,county
0,501,40.922326,-72.637078,Holtsville,NY,Suffolk
1,544,40.922326,-72.637078,Holtsville,NY,Suffolk
2,601,18.165273,-66.722583,Adjuntas,PR,Adjuntas
3,602,18.393103,-67.180953,Aguada,PR,Aguada
4,603,18.455913,-67.14578,Aguadilla,PR,Aguadilla


In [110]:
zipcode.dtypes

zipcode        int64
latitude     float64
longitude    float64
city          object
state         object
county        object
dtype: object

In [111]:
zipcode.shape

(42741, 6)

In [112]:
# importing FIPS for county; zipcodes
# https://data.world/integrations/python
import datadotworld as dw

In [113]:
results = dw.query(
	'niccolley/us-zipcode-to-county-state', 
    'SELECT * FROM zip_county_fips_2018_03')
results_df = results.dataframe

In [114]:
results_df.head()

Unnamed: 0,zip,stcountyfp,city,state,countyname,classfp
0,36091,1001,Verbena,AL,Autauga County,H1
1,36758,1001,Plantersville,AL,Autauga County,H1
2,36749,1001,Jones,AL,Autauga County,H1
3,36250,1015,Alexandria,AL,Calhoun County,H1
4,36203,1121,Oxford,AL,Talladega County,H1


In [115]:
# save df to csv
results_df.to_csv("./data/fips.csv")

In [116]:
results_df.shape

(53962, 6)

In [117]:
results_df.dtypes

zip           object
stcountyfp    object
city          object
state         object
countyname    object
classfp       object
dtype: object

In [118]:
results_df.rename(columns={'zip': 'zipcode','stcountyfp': 'fips'}, inplace=True)

In [119]:
results_df.head()

Unnamed: 0,zipcode,fips,city,state,countyname,classfp
0,36091,1001,Verbena,AL,Autauga County,H1
1,36758,1001,Plantersville,AL,Autauga County,H1
2,36749,1001,Jones,AL,Autauga County,H1
3,36250,1015,Alexandria,AL,Calhoun County,H1
4,36203,1121,Oxford,AL,Talladega County,H1


In [120]:
results_df = results_df.apply(pd.to_numeric, errors='ignore')

In [121]:
results_df.dtypes

zipcode        int64
fips           int64
city          object
state         object
countyname    object
classfp       object
dtype: object

In [122]:
results_df.head()

Unnamed: 0,zipcode,fips,city,state,countyname,classfp
0,36091,1001,Verbena,AL,Autauga County,H1
1,36758,1001,Plantersville,AL,Autauga County,H1
2,36749,1001,Jones,AL,Autauga County,H1
3,36250,1015,Alexandria,AL,Calhoun County,H1
4,36203,1121,Oxford,AL,Talladega County,H1


In [123]:
# merge FIPS to zipcode df
zipfips = pd.merge(zipcode,results_df[['zipcode','fips']],on='zipcode', how='left')

In [124]:
zipfips.head()

Unnamed: 0,zipcode,latitude,longitude,city,state,county,fips
0,501,40.922326,-72.637078,Holtsville,NY,Suffolk,36103.0
1,544,40.922326,-72.637078,Holtsville,NY,Suffolk,
2,601,18.165273,-66.722583,Adjuntas,PR,Adjuntas,72001.0
3,601,18.165273,-66.722583,Adjuntas,PR,Adjuntas,72113.0
4,602,18.393103,-67.180953,Aguada,PR,Aguada,72003.0


In [98]:
#zipfips['fips'] = zipfips['fips'].apply(lambda x: str(x).zfill(5))

In [99]:
zipfips.dtypes

zipcode        int64
latitude     float64
longitude    float64
city          object
state         object
county        object
fips          object
dtype: object

In [105]:
#zipfips = zipfips['fips'].apply(pd.to_numeric, errors='ignore')

In [126]:
# save df to csv
zipfips.to_csv("./data/zipfips.csv")

In [127]:
count_nan = len(zipfips) - zipfips.count()

In [128]:
count_nan

zipcode         0
latitude      727
longitude     727
city            0
state           0
county        542
fips         3696
dtype: int64

In [129]:
zipfips.shape

(57163, 7)

In [130]:
zipfips.tail()

Unnamed: 0,zipcode,latitude,longitude,city,state,county,fips
57158,99926,55.094325,-131.566827,Metlakatla,AK,Prince Wales Ketchikan,2198.0
57159,99927,55.517921,-132.003244,Point Baker,AK,Prince Wales Ketchikan,2198.0
57160,99928,55.395359,-131.67537,Ward Cove,AK,Ketchikan Gateway,2130.0
57161,99929,56.449893,-132.364407,Wrangell,AK,Wrangell Petersburg,2275.0
57162,99950,55.542007,-131.432682,Ketchikan,AK,Ketchikan Gateway,
