# Examination of childcare facility entries that failed geocoding

In [7]:
# IMPORTS
import geopandas as gpd
import pandas as pd

import os
import urllib.request
import requests
import shutil
from pathlib import Path
from zipfile import ZipFile

import matplotlib.pyplot as plt
from matplotlib import pyplot

import folium

from shapely.geometry import Point, Polygon

from geopandas.tools import overlay

from geopy.geocoders import Nominatim # for geocoding

In [3]:
# read in the childcare facility data we just unzipped
childcare = pd.read_excel('data/regulated-child-care-facilities.xlsx')


In [4]:
# clean and covert the data

# reduce the childcare dataframe to only the columns we will need
childcare = childcare.loc[:,'COUNTY':'zip code']

# replace spaces in column names with '_' to make life easier
childcare.columns = childcare.columns.str.replace(' ', '_')

# this dataset covers the entire state, so filter it down to just St. Louis
# note that we may be able to expand to include the COUNTY of St. Louis - we can reassess this later
# sorted(childcare.city.unique())
childcare = childcare.loc[childcare['city'] == 'ST LOUIS']

# merge together street addres, city, and state to create the "full address"
childcare['full_address'] = childcare.street_address + "," + childcare.city + "," + childcare.state


In [9]:
# set up the geocoder
geolocator = Nominatim(timeout=10, user_agent = "myGeolocator")

# pass the full addresses to the geocoder and store the results in a new column
childcare['geocode'] = childcare.full_address.apply(geolocator.geocode)

In [82]:
# find all rows where the geocode didn't populate
child_nogeo = childcare[childcare['geocode'].isna()].copy()
child_nogeo

Unnamed: 0,COUNTY,facility_type,regulatory_status,facility_name,dvn,street_address,city,state,zip_code,full_address,geocode
989,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"APPLE OF YOUR EYE LEARNING CENTER, INC.",500836,9994 TESSON CREEK ESTATES DR,ST LOUIS,MO,63123,"9994 TESSON CREEK ESTATES DR,ST LOUIS,MO",
1019,ST LOUIS,CHILD CARE CENTER,LICENSABLE,COURTNEY & MCKENZIE DAYCARE CENTER,1298877,2120 REDMAN RD,ST LOUIS,MO,63136,"2120 REDMAN RD,ST LOUIS,MO",
1036,ST LOUIS,CHILD CARE CENTER,LICENSABLE,FIRST STEPS EARLY CHILDHOOD LEARNING CENTER,2287583,6912 WOODSMERE CIR,ST LOUIS,MO,63129,"6912 WOODSMERE CIR,ST LOUIS,MO",
1057,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"JESSIE B'S ACADEMY, LLC",2735544,2002 LUCAS AND HUNT RD,ST LOUIS,MO,63121-5012,"2002 LUCAS AND HUNT RD,ST LOUIS,MO",
1119,ST LOUIS,CHILD CARE CENTER,LICENSABLE,MARY MARGARET DAY CARE AND LEARNING CENTER,2107919,2011 REDMAN RD,ST LOUIS,MO,63138,"2011 REDMAN RD,ST LOUIS,MO",
1150,ST LOUIS,CHILD CARE CENTER,LICENSABLE,SAPPINGTON CHILD CARE CENTER,652922,9915 EMIL ST,ST LOUIS,MO,63126,"9915 EMIL ST,ST LOUIS,MO",
1166,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"THE AMITY COMPANY, INC.",2367960,6060 A TELEGRAPH RD,ST LOUIS,MO,63129,"6060 A TELEGRAPH RD,ST LOUIS,MO",
1187,ST LOUIS,CHILD CARE CENTER,LICENSABLE,TILSUNUP24HR DAYCARE LLC,2455098,3860 LUCAS AND HUNT RD,ST LOUIS,MO,63121-2933,"3860 LUCAS AND HUNT RD,ST LOUIS,MO",
1202,ST LOUIS,CHILD CARE CENTER,LICENSABLE,WASHINGTON UNIVERSITY NURSERY SCHOOL,184250,6926 FOREST PARK PKWY BLDG 2,ST LOUIS,MO,63130-4433,"6926 FOREST PARK PKWY BLDG 2,ST LOUIS,MO",
1203,ST LOUIS,CHILD CARE CENTER,LICENSABLE,WEBSTER CHILD CARE CENTER AT LACLEDE GROVES,186758,624 LOHMANN FOREST LN,ST LOUIS,MO,63119,"624 LOHMANN FOREST LN,ST LOUIS,MO",


In [83]:
# how many problem entries are we looking at?
len(child_nogeo.index)

26

#### Output a few of the entries and manually edit them to work 

In [84]:
child_nogeo.iloc[0]['full_address']

'9994 TESSON CREEK ESTATES DR,ST LOUIS,MO'

In [85]:
geolocator.geocode('9994 TESSON CREEK,ST LOUIS,MO')
# removing "DR" worked

Location(9994, Tesson Creek Estates Road, Tesson Creek Estates, Affton, Saint Louis County, Missouri, 63123, United States, (38.535889183019705, -90.33530295189881, 0.0))

In [86]:
child_nogeo.iloc[1]['full_address']

'2120 REDMAN RD,ST LOUIS,MO'

In [87]:
geolocator.geocode('2120 REDMAN,ST LOUIS,MO')
# removing "RD" worked

Location(2120, Redman Avenue, Saint Louis County, Missouri, 63136, United States, (38.78477, -90.2357483, 0.0))

In [88]:
child_nogeo.iloc[2]['full_address']

'6912 WOODSMERE CIR,ST LOUIS,MO'

In [89]:
geolocator.geocode('6912 WOODSMERE,ST LOUIS,MO')
# no solution found here

In [90]:
child_nogeo.iloc[3]['full_address']

'2002 LUCAS AND HUNT RD,ST LOUIS,MO'

In [91]:
geolocator.geocode('2002 LUCAS HUNT RD,ST LOUIS,MO')
# removing "AND" worked

Location(2002, Lucas & Hunt Road, Hillsdale, Saint Louis County, Missouri, 63121, United States, (38.687117955185144, -90.29328387190589, 0.0))

In [92]:
geolocator.geocode('100 east adams,ST LOUIS,MO')
# changing "e" to "east" worked

Location(First Presbyterian Church of Kirkwood, 100, East Adams Avenue, Kirkwood, Saint Louis County, Missouri, 63122, United States, (38.58296585, -90.40586986438728, 0.0))

In [93]:
# within the items that didn't geocode, change the following
# remove 'AVE','DR','RD','LN'
# maybe back up from the end of each string to the last space, look at those results

In [94]:
child_nogeo

Unnamed: 0,COUNTY,facility_type,regulatory_status,facility_name,dvn,street_address,city,state,zip_code,full_address,geocode
989,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"APPLE OF YOUR EYE LEARNING CENTER, INC.",500836,9994 TESSON CREEK ESTATES DR,ST LOUIS,MO,63123,"9994 TESSON CREEK ESTATES DR,ST LOUIS,MO",
1019,ST LOUIS,CHILD CARE CENTER,LICENSABLE,COURTNEY & MCKENZIE DAYCARE CENTER,1298877,2120 REDMAN RD,ST LOUIS,MO,63136,"2120 REDMAN RD,ST LOUIS,MO",
1036,ST LOUIS,CHILD CARE CENTER,LICENSABLE,FIRST STEPS EARLY CHILDHOOD LEARNING CENTER,2287583,6912 WOODSMERE CIR,ST LOUIS,MO,63129,"6912 WOODSMERE CIR,ST LOUIS,MO",
1057,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"JESSIE B'S ACADEMY, LLC",2735544,2002 LUCAS AND HUNT RD,ST LOUIS,MO,63121-5012,"2002 LUCAS AND HUNT RD,ST LOUIS,MO",
1119,ST LOUIS,CHILD CARE CENTER,LICENSABLE,MARY MARGARET DAY CARE AND LEARNING CENTER,2107919,2011 REDMAN RD,ST LOUIS,MO,63138,"2011 REDMAN RD,ST LOUIS,MO",
1150,ST LOUIS,CHILD CARE CENTER,LICENSABLE,SAPPINGTON CHILD CARE CENTER,652922,9915 EMIL ST,ST LOUIS,MO,63126,"9915 EMIL ST,ST LOUIS,MO",
1166,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"THE AMITY COMPANY, INC.",2367960,6060 A TELEGRAPH RD,ST LOUIS,MO,63129,"6060 A TELEGRAPH RD,ST LOUIS,MO",
1187,ST LOUIS,CHILD CARE CENTER,LICENSABLE,TILSUNUP24HR DAYCARE LLC,2455098,3860 LUCAS AND HUNT RD,ST LOUIS,MO,63121-2933,"3860 LUCAS AND HUNT RD,ST LOUIS,MO",
1202,ST LOUIS,CHILD CARE CENTER,LICENSABLE,WASHINGTON UNIVERSITY NURSERY SCHOOL,184250,6926 FOREST PARK PKWY BLDG 2,ST LOUIS,MO,63130-4433,"6926 FOREST PARK PKWY BLDG 2,ST LOUIS,MO",
1203,ST LOUIS,CHILD CARE CENTER,LICENSABLE,WEBSTER CHILD CARE CENTER AT LACLEDE GROVES,186758,624 LOHMANN FOREST LN,ST LOUIS,MO,63119,"624 LOHMANN FOREST LN,ST LOUIS,MO",


In [95]:
st = child_nogeo.iloc[0]['street_address']
st.rsplit(' ', 1)

['9994 TESSON CREEK ESTATES', 'DR']

In [96]:
st.rsplit(' ', 1)[1]

'DR'

In [98]:
# if st.rsplit [1] in 'DR','AVE','LN' then full_address = st.rsplit[0]+city+state
# else do some other stuff
# child_nogeo.drop('new_address',axis=1)

child_nogeo['new_address'] = child_nogeo['street_address']
child_nogeo['new_address'] = child_nogeo.new_address.str.replace(' DR','')
child_nogeo['new_address'] = child_nogeo.new_address.str.replace(' RD','')
child_nogeo['new_address'] = child_nogeo.new_address.str.replace(' ST','')
child_nogeo['new_address'] = child_nogeo.new_address.str.replace(' LN','')
child_nogeo['new_address'] = child_nogeo.new_address.str.replace(' AVE','')
child_nogeo['new_address'] = child_nogeo.new_address.str.replace(' BLVD','')
child_nogeo['new_address'] = child_nogeo.new_address.str.replace(' E ',' EAST ')




In [99]:
child_nogeo

Unnamed: 0,COUNTY,facility_type,regulatory_status,facility_name,dvn,street_address,city,state,zip_code,full_address,geocode,new_address
989,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"APPLE OF YOUR EYE LEARNING CENTER, INC.",500836,9994 TESSON CREEK ESTATES DR,ST LOUIS,MO,63123,"9994 TESSON CREEK ESTATES DR,ST LOUIS,MO",,9994 TESSON CREEK ESTATES
1019,ST LOUIS,CHILD CARE CENTER,LICENSABLE,COURTNEY & MCKENZIE DAYCARE CENTER,1298877,2120 REDMAN RD,ST LOUIS,MO,63136,"2120 REDMAN RD,ST LOUIS,MO",,2120 REDMAN
1036,ST LOUIS,CHILD CARE CENTER,LICENSABLE,FIRST STEPS EARLY CHILDHOOD LEARNING CENTER,2287583,6912 WOODSMERE CIR,ST LOUIS,MO,63129,"6912 WOODSMERE CIR,ST LOUIS,MO",,6912 WOODSMERE CIR
1057,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"JESSIE B'S ACADEMY, LLC",2735544,2002 LUCAS AND HUNT RD,ST LOUIS,MO,63121-5012,"2002 LUCAS AND HUNT RD,ST LOUIS,MO",,2002 LUCAS AND HUNT
1119,ST LOUIS,CHILD CARE CENTER,LICENSABLE,MARY MARGARET DAY CARE AND LEARNING CENTER,2107919,2011 REDMAN RD,ST LOUIS,MO,63138,"2011 REDMAN RD,ST LOUIS,MO",,2011 REDMAN
1150,ST LOUIS,CHILD CARE CENTER,LICENSABLE,SAPPINGTON CHILD CARE CENTER,652922,9915 EMIL ST,ST LOUIS,MO,63126,"9915 EMIL ST,ST LOUIS,MO",,9915 EMIL
1166,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"THE AMITY COMPANY, INC.",2367960,6060 A TELEGRAPH RD,ST LOUIS,MO,63129,"6060 A TELEGRAPH RD,ST LOUIS,MO",,6060 A TELEGRAPH
1187,ST LOUIS,CHILD CARE CENTER,LICENSABLE,TILSUNUP24HR DAYCARE LLC,2455098,3860 LUCAS AND HUNT RD,ST LOUIS,MO,63121-2933,"3860 LUCAS AND HUNT RD,ST LOUIS,MO",,3860 LUCAS AND HUNT
1202,ST LOUIS,CHILD CARE CENTER,LICENSABLE,WASHINGTON UNIVERSITY NURSERY SCHOOL,184250,6926 FOREST PARK PKWY BLDG 2,ST LOUIS,MO,63130-4433,"6926 FOREST PARK PKWY BLDG 2,ST LOUIS,MO",,6926 FOREST PARK PKWY BLDG 2
1203,ST LOUIS,CHILD CARE CENTER,LICENSABLE,WEBSTER CHILD CARE CENTER AT LACLEDE GROVES,186758,624 LOHMANN FOREST LN,ST LOUIS,MO,63119,"624 LOHMANN FOREST LN,ST LOUIS,MO",,624 LOHMANN FOREST


In [100]:
child_nogeo['full_address_v2'] = child_nogeo.new_address + "," + child_nogeo.city + "," + child_nogeo.state


In [101]:
child_nogeo

Unnamed: 0,COUNTY,facility_type,regulatory_status,facility_name,dvn,street_address,city,state,zip_code,full_address,geocode,new_address,full_address_v2
989,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"APPLE OF YOUR EYE LEARNING CENTER, INC.",500836,9994 TESSON CREEK ESTATES DR,ST LOUIS,MO,63123,"9994 TESSON CREEK ESTATES DR,ST LOUIS,MO",,9994 TESSON CREEK ESTATES,"9994 TESSON CREEK ESTATES,ST LOUIS,MO"
1019,ST LOUIS,CHILD CARE CENTER,LICENSABLE,COURTNEY & MCKENZIE DAYCARE CENTER,1298877,2120 REDMAN RD,ST LOUIS,MO,63136,"2120 REDMAN RD,ST LOUIS,MO",,2120 REDMAN,"2120 REDMAN,ST LOUIS,MO"
1036,ST LOUIS,CHILD CARE CENTER,LICENSABLE,FIRST STEPS EARLY CHILDHOOD LEARNING CENTER,2287583,6912 WOODSMERE CIR,ST LOUIS,MO,63129,"6912 WOODSMERE CIR,ST LOUIS,MO",,6912 WOODSMERE CIR,"6912 WOODSMERE CIR,ST LOUIS,MO"
1057,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"JESSIE B'S ACADEMY, LLC",2735544,2002 LUCAS AND HUNT RD,ST LOUIS,MO,63121-5012,"2002 LUCAS AND HUNT RD,ST LOUIS,MO",,2002 LUCAS AND HUNT,"2002 LUCAS AND HUNT,ST LOUIS,MO"
1119,ST LOUIS,CHILD CARE CENTER,LICENSABLE,MARY MARGARET DAY CARE AND LEARNING CENTER,2107919,2011 REDMAN RD,ST LOUIS,MO,63138,"2011 REDMAN RD,ST LOUIS,MO",,2011 REDMAN,"2011 REDMAN,ST LOUIS,MO"
1150,ST LOUIS,CHILD CARE CENTER,LICENSABLE,SAPPINGTON CHILD CARE CENTER,652922,9915 EMIL ST,ST LOUIS,MO,63126,"9915 EMIL ST,ST LOUIS,MO",,9915 EMIL,"9915 EMIL,ST LOUIS,MO"
1166,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"THE AMITY COMPANY, INC.",2367960,6060 A TELEGRAPH RD,ST LOUIS,MO,63129,"6060 A TELEGRAPH RD,ST LOUIS,MO",,6060 A TELEGRAPH,"6060 A TELEGRAPH,ST LOUIS,MO"
1187,ST LOUIS,CHILD CARE CENTER,LICENSABLE,TILSUNUP24HR DAYCARE LLC,2455098,3860 LUCAS AND HUNT RD,ST LOUIS,MO,63121-2933,"3860 LUCAS AND HUNT RD,ST LOUIS,MO",,3860 LUCAS AND HUNT,"3860 LUCAS AND HUNT,ST LOUIS,MO"
1202,ST LOUIS,CHILD CARE CENTER,LICENSABLE,WASHINGTON UNIVERSITY NURSERY SCHOOL,184250,6926 FOREST PARK PKWY BLDG 2,ST LOUIS,MO,63130-4433,"6926 FOREST PARK PKWY BLDG 2,ST LOUIS,MO",,6926 FOREST PARK PKWY BLDG 2,"6926 FOREST PARK PKWY BLDG 2,ST LOUIS,MO"
1203,ST LOUIS,CHILD CARE CENTER,LICENSABLE,WEBSTER CHILD CARE CENTER AT LACLEDE GROVES,186758,624 LOHMANN FOREST LN,ST LOUIS,MO,63119,"624 LOHMANN FOREST LN,ST LOUIS,MO",,624 LOHMANN FOREST,"624 LOHMANN FOREST,ST LOUIS,MO"


In [102]:
child_nogeo['geocode'] = child_nogeo.full_address_v2.apply(geolocator.geocode)

In [103]:
# # get the latitude and longitude values from the geodata column and put them in their own columns for easier plotting
# child_nogeo['lat'] = [g.latitude for g in child_nogeo.geocode]
# child_nogeo['long'] = [g.longitude for g in child_nogeo.geocode]


In [104]:
child_nogeo

Unnamed: 0,COUNTY,facility_type,regulatory_status,facility_name,dvn,street_address,city,state,zip_code,full_address,geocode,new_address,full_address_v2
989,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"APPLE OF YOUR EYE LEARNING CENTER, INC.",500836,9994 TESSON CREEK ESTATES DR,ST LOUIS,MO,63123,"9994 TESSON CREEK ESTATES DR,ST LOUIS,MO","(Tesson Creek Estates, Affton, Saint Louis Cou...",9994 TESSON CREEK ESTATES,"9994 TESSON CREEK ESTATES,ST LOUIS,MO"
1019,ST LOUIS,CHILD CARE CENTER,LICENSABLE,COURTNEY & MCKENZIE DAYCARE CENTER,1298877,2120 REDMAN RD,ST LOUIS,MO,63136,"2120 REDMAN RD,ST LOUIS,MO","(2120, Redman Avenue, Saint Louis County, Miss...",2120 REDMAN,"2120 REDMAN,ST LOUIS,MO"
1036,ST LOUIS,CHILD CARE CENTER,LICENSABLE,FIRST STEPS EARLY CHILDHOOD LEARNING CENTER,2287583,6912 WOODSMERE CIR,ST LOUIS,MO,63129,"6912 WOODSMERE CIR,ST LOUIS,MO",,6912 WOODSMERE CIR,"6912 WOODSMERE CIR,ST LOUIS,MO"
1057,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"JESSIE B'S ACADEMY, LLC",2735544,2002 LUCAS AND HUNT RD,ST LOUIS,MO,63121-5012,"2002 LUCAS AND HUNT RD,ST LOUIS,MO",,2002 LUCAS AND HUNT,"2002 LUCAS AND HUNT,ST LOUIS,MO"
1119,ST LOUIS,CHILD CARE CENTER,LICENSABLE,MARY MARGARET DAY CARE AND LEARNING CENTER,2107919,2011 REDMAN RD,ST LOUIS,MO,63138,"2011 REDMAN RD,ST LOUIS,MO","(2011, Redman Avenue, Spanish Lake, Saint Loui...",2011 REDMAN,"2011 REDMAN,ST LOUIS,MO"
1150,ST LOUIS,CHILD CARE CENTER,LICENSABLE,SAPPINGTON CHILD CARE CENTER,652922,9915 EMIL ST,ST LOUIS,MO,63126,"9915 EMIL ST,ST LOUIS,MO",,9915 EMIL,"9915 EMIL,ST LOUIS,MO"
1166,ST LOUIS,CHILD CARE CENTER,LICENSABLE,"THE AMITY COMPANY, INC.",2367960,6060 A TELEGRAPH RD,ST LOUIS,MO,63129,"6060 A TELEGRAPH RD,ST LOUIS,MO",,6060 A TELEGRAPH,"6060 A TELEGRAPH,ST LOUIS,MO"
1187,ST LOUIS,CHILD CARE CENTER,LICENSABLE,TILSUNUP24HR DAYCARE LLC,2455098,3860 LUCAS AND HUNT RD,ST LOUIS,MO,63121-2933,"3860 LUCAS AND HUNT RD,ST LOUIS,MO",,3860 LUCAS AND HUNT,"3860 LUCAS AND HUNT,ST LOUIS,MO"
1202,ST LOUIS,CHILD CARE CENTER,LICENSABLE,WASHINGTON UNIVERSITY NURSERY SCHOOL,184250,6926 FOREST PARK PKWY BLDG 2,ST LOUIS,MO,63130-4433,"6926 FOREST PARK PKWY BLDG 2,ST LOUIS,MO",,6926 FOREST PARK PKWY BLDG 2,"6926 FOREST PARK PKWY BLDG 2,ST LOUIS,MO"
1203,ST LOUIS,CHILD CARE CENTER,LICENSABLE,WEBSTER CHILD CARE CENTER AT LACLEDE GROVES,186758,624 LOHMANN FOREST LN,ST LOUIS,MO,63119,"624 LOHMANN FOREST LN,ST LOUIS,MO",,624 LOHMANN FOREST,"624 LOHMANN FOREST,ST LOUIS,MO"
