In [1]:
# import necessary packages
import pandas as pd
import geopandas as gpd
import numpy as np

In [2]:
# Read in csvs
#
# Note: In input .csv for missing people had to fix:
# Virgin Islands (to United States Virgin Islands), 
# Tennesse (to Tennessee), and 
# Northern Mariana Islands (to Commonwealth of the Northern Mariana Islands)
#
city_df = pd.read_csv('cities.csv')
county_centroids_df = pd.read_csv('county_centroids.csv')
state_centroids_df = pd.read_csv('state_centroids.csv')
missing_df = pd.read_csv('Missing_04182020.csv')
unclaimed_df = pd.read_csv('Unclaimed_04182020.csv')
unidentified_df = pd.read_csv('Unidentified_04182020.csv')

In [3]:
missing_df.head()

Unnamed: 0,Case Number,DLC,Last Name,First Name,Missing Age,City,County,State,Sex,Race / Ethnicity,Date Modified
0,MP68158,3/19/2020,Mencer,Steven,33,Unalaska,Aleutians West,Alaska,Male,White / Caucasian,4/14/2020
1,MP68103,2/25/2020,Germain,Peter,76,Trapper Creek,Matanuska-Susitna,Alaska,Male,White / Caucasian,4/14/2020
2,MP65871,1/5/2020,Pastel,Kale,36,Sitka,Sitka,Alaska,Male,White / Caucasian,4/14/2020
3,MP64094,12/31/2019,Cobban,David,30,Kodiak,Kodiak Island,Alaska,Male,White / Caucasian,4/14/2020
4,MP64101,12/31/2019,Rainey,Brock,47,Kodiak,Kodiak Island,Alaska,Male,White / Caucasian,4/14/2020


#### Add in State FIPS column

In [4]:
# make dictionary of states and state FIPS code
state_dict = dict(zip(state_centroids_df.NAME, state_centroids_df.STATEFP))
# state_dict['Alaska']

In [5]:
# Add new column to dataframe, using the state name field as a key in the state_dict, to pull the correct FIPS code for each row
missing_df['State_FIPS'] = missing_df['State'].map(state_dict)
# Check unique values in new dataframe field
missing_df['State_FIPS'].unique()

array([ 2,  1,  5,  4,  6,  8,  9, 11, 10, 12, 13, 66, 15, 19, 16, 17, 18,
       20, 21, 22, 25, 24, 23, 26, 27, 29, 69, 28, 30, 37, 38, 31, 33, 34,
       35, 32, 36, 39, 40, 41, 42, 72, 44, 45, 46, 47, 48, 49, 51, 78, 50,
       53, 55, 54, 56], dtype=int64)

In [None]:
## Check to see what is producing null values
# test_df = missing_df.loc[missing_df['State_FIPS'].isnull()]

In [6]:
# check misisng df
missing_df.head()

Unnamed: 0,Case Number,DLC,Last Name,First Name,Missing Age,City,County,State,Sex,Race / Ethnicity,Date Modified,State_FIPS
0,MP68158,3/19/2020,Mencer,Steven,33,Unalaska,Aleutians West,Alaska,Male,White / Caucasian,4/14/2020,2
1,MP68103,2/25/2020,Germain,Peter,76,Trapper Creek,Matanuska-Susitna,Alaska,Male,White / Caucasian,4/14/2020,2
2,MP65871,1/5/2020,Pastel,Kale,36,Sitka,Sitka,Alaska,Male,White / Caucasian,4/14/2020,2
3,MP64094,12/31/2019,Cobban,David,30,Kodiak,Kodiak Island,Alaska,Male,White / Caucasian,4/14/2020,2
4,MP64101,12/31/2019,Rainey,Brock,47,Kodiak,Kodiak Island,Alaska,Male,White / Caucasian,4/14/2020,2


#### Add in County FIPS column

In [7]:
# check county dataframe
county_centroids_df.head()

Unnamed: 0,OBJECTID,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,ORIG_FID,Lon_dd,Lat_dd
0,1,21,7,516850,0500000US21007,21007,Ballard,6,639387500.0,69473325.0,1,-88.999262,37.058489
1,2,21,17,516855,0500000US21017,21017,Bourbon,6,750439400.0,4829777.0,2,-84.217155,38.206742
2,3,21,31,516862,0500000US21031,21031,Butler,6,1103572000.0,13943044.0,3,-86.681628,37.207292
3,4,21,65,516879,0500000US21065,21065,Estill,6,655509900.0,6516335.0,4,-83.964316,37.692451
4,5,21,69,516881,0500000US21069,21069,Fleming,6,902727200.0,7182793.0,5,-83.69666,38.370126


In [8]:
# Add column with compound field key
county_centroids_df['County_Key'] = county_centroids_df['STATEFP'].astype(str) + "_" + county_centroids_df['NAME']
county_centroids_df.head()

Unnamed: 0,OBJECTID,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,ORIG_FID,Lon_dd,Lat_dd,County_Key
0,1,21,7,516850,0500000US21007,21007,Ballard,6,639387500.0,69473325.0,1,-88.999262,37.058489,21_Ballard
1,2,21,17,516855,0500000US21017,21017,Bourbon,6,750439400.0,4829777.0,2,-84.217155,38.206742,21_Bourbon
2,3,21,31,516862,0500000US21031,21031,Butler,6,1103572000.0,13943044.0,3,-86.681628,37.207292,21_Butler
3,4,21,65,516879,0500000US21065,21065,Estill,6,655509900.0,6516335.0,4,-83.964316,37.692451,21_Estill
4,5,21,69,516881,0500000US21069,21069,Fleming,6,902727200.0,7182793.0,5,-83.69666,38.370126,21_Fleming


In [9]:
# check unique values and length
county_key_list = county_centroids_df['County_Key'].unique()
len(county_key_list)

3227

In [13]:
# make dictionary of counties and county FIPS code (GEIOD field)
county_dict = dict(zip(county_centroids_df.County_Key, county_centroids_df.GEOID))
county_dict

{'21_Ballard': 21007,
 '21_Bourbon': 21017,
 '21_Butler': 21031,
 '21_Estill': 21065,
 '21_Fleming': 21069,
 '21_Hardin': 21093,
 '21_Hart': 21099,
 '21_Leslie': 21131,
 '21_Madison': 21151,
 '21_Marion': 21155,
 '21_Nelson': 21179,
 '17_Kankakee': 17091,
 '17_Warren': 17187,
 '17_Will': 17197,
 '18_Daviess': 18027,
 '18_Harrison': 18061,
 '1_Autauga': 1001,
 '1_Barbour': 1005,
 '1_Choctaw': 1023,
 '1_Conecuh': 1035,
 '1_Elmore': 1051,
 '1_Etowah': 1055,
 '1_Hale': 1065,
 '1_Madison': 1089,
 '1_Russell': 1113,
 '1_Shelby': 1117,
 '2_Aleutians West': 2016,
 '2_Ketchikan Gateway': 2130,
 '2_Nome': 2180,
 '2_Yakutat': 2282,
 '5_Bradley': 5011,
 '5_Carroll': 5015,
 '5_Clark': 5019,
 '5_Cleveland': 5025,
 '5_Independence': 5063,
 '5_Jackson': 5067,
 '5_Randolph': 5121,
 '6_Alameda': 6001,
 '6_Alpine': 6003,
 '6_Amador': 6005,
 '6_Contra Costa': 6013,
 '6_Fresno': 6019,
 '6_Humboldt': 6023,
 '6_Lake': 6033,
 '6_Los Angeles': 6037,
 '6_Merced': 6047,
 '6_Nevada': 6057,
 '6_San Mateo': 6081,
 

In [11]:
# check missing df
missing_df.head()

Unnamed: 0,Case Number,DLC,Last Name,First Name,Missing Age,City,County,State,Sex,Race / Ethnicity,Date Modified,State_FIPS
0,MP68158,3/19/2020,Mencer,Steven,33,Unalaska,Aleutians West,Alaska,Male,White / Caucasian,4/14/2020,2
1,MP68103,2/25/2020,Germain,Peter,76,Trapper Creek,Matanuska-Susitna,Alaska,Male,White / Caucasian,4/14/2020,2
2,MP65871,1/5/2020,Pastel,Kale,36,Sitka,Sitka,Alaska,Male,White / Caucasian,4/14/2020,2
3,MP64094,12/31/2019,Cobban,David,30,Kodiak,Kodiak Island,Alaska,Male,White / Caucasian,4/14/2020,2
4,MP64101,12/31/2019,Rainey,Brock,47,Kodiak,Kodiak Island,Alaska,Male,White / Caucasian,4/14/2020,2


In [12]:
# Add column with compound field key
missing_df['State_County'] = missing_df['State_FIPS'].astype(str) + "_" + missing_df['County']
missing_df.head()

Unnamed: 0,Case Number,DLC,Last Name,First Name,Missing Age,City,County,State,Sex,Race / Ethnicity,Date Modified,State_FIPS,State_County
0,MP68158,3/19/2020,Mencer,Steven,33,Unalaska,Aleutians West,Alaska,Male,White / Caucasian,4/14/2020,2,2_Aleutians West
1,MP68103,2/25/2020,Germain,Peter,76,Trapper Creek,Matanuska-Susitna,Alaska,Male,White / Caucasian,4/14/2020,2,2_Matanuska-Susitna
2,MP65871,1/5/2020,Pastel,Kale,36,Sitka,Sitka,Alaska,Male,White / Caucasian,4/14/2020,2,2_Sitka
3,MP64094,12/31/2019,Cobban,David,30,Kodiak,Kodiak Island,Alaska,Male,White / Caucasian,4/14/2020,2,2_Kodiak Island
4,MP64101,12/31/2019,Rainey,Brock,47,Kodiak,Kodiak Island,Alaska,Male,White / Caucasian,4/14/2020,2,2_Kodiak Island


In [14]:
# Add new column to dataframe, using the state name field as a key in the state_dict, to pull the correct FIPS code for each row
missing_df['County_FIPS'] = missing_df['State_County'].map(county_dict)
# Check unique values in new dataframe field
missing_df['County_FIPS'].unique()

array([ 2016.,  2170.,  2220., ..., 56015., 56021., 56019.])

In [23]:
test2_df = missing_df.loc[missing_df['County_FIPS'].isnull()]
test2_df.shape

(193, 14)

In [25]:
test2_df.to_csv('county_nulls.csv')

In [15]:
# check missing_df
missing_df.head()

Unnamed: 0,Case Number,DLC,Last Name,First Name,Missing Age,City,County,State,Sex,Race / Ethnicity,Date Modified,State_FIPS,State_County,County_FIPS
0,MP68158,3/19/2020,Mencer,Steven,33,Unalaska,Aleutians West,Alaska,Male,White / Caucasian,4/14/2020,2,2_Aleutians West,2016.0
1,MP68103,2/25/2020,Germain,Peter,76,Trapper Creek,Matanuska-Susitna,Alaska,Male,White / Caucasian,4/14/2020,2,2_Matanuska-Susitna,2170.0
2,MP65871,1/5/2020,Pastel,Kale,36,Sitka,Sitka,Alaska,Male,White / Caucasian,4/14/2020,2,2_Sitka,2220.0
3,MP64094,12/31/2019,Cobban,David,30,Kodiak,Kodiak Island,Alaska,Male,White / Caucasian,4/14/2020,2,2_Kodiak Island,2150.0
4,MP64101,12/31/2019,Rainey,Brock,47,Kodiak,Kodiak Island,Alaska,Male,White / Caucasian,4/14/2020,2,2_Kodiak Island,2150.0
