# St. Louis Childcare Facilities

Data acquisition, documentation, carpentry, geocoding, and database loading for St. Louis childcare facility location and buffer info.

In [1]:
# IMPORTS
import geopandas as gpd
import pandas as pd

import os
import urllib.request
import requests
import shutil
from pathlib import Path
from zipfile import ZipFile

import matplotlib.pyplot as plt
from matplotlib import pyplot

import folium

from shapely.geometry import Point, Polygon

from geopandas.tools import overlay

from geopy.geocoders import Nominatim # for geocoding

<a id='childcare'></a>
### Childcare facilities in Missouri

List of all licensed and license-exempt childcare providers in Missouri. Basic information in table form including county, facility type, facility name, street address, city, state, and zip code. Provided by Missouri Dept of Elementary and Secondary Education (DESE), Child Care Compliance team.

https://dese.mo.gov/childhood/child-care/find-care  
https://dese.mo.gov/media/70106/download  
https://healthapps.dhss.mo.gov/childcaresearch/



In [2]:
# set URL to pull the file from
file_url = "https://dese.mo.gov/media/70106/download"

# Designate the local filename
local_file_name = 'regulated-child-care-facilities.xlsx'

# Designate the local file name with a path to a temp directory.
file_Path = Path('data/')  
file_Path /= local_file_name

# go get the file and save it locally
resp = requests.get(file_url)
with open(file_Path, 'wb') as output:
    output.write(resp.content)

In [3]:
# read in the childcare facility data we just downloaded
childcare = pd.read_excel(file_Path)


In [4]:
# take a look the data
childcare.head()


Unnamed: 0,COUNTY,facility type,regulatory status,facility name,dvn,street address,city,state,zip code,facility phone,...,Column16336,Column16337,Column16338,Column16339,Column16340,Column16341,Column16342,Column16343,Column16344,Column16345
0,BOLLINGER,CHILD CARE CENTER,LICENSABLE,"EAST MISSOURI ACTION AGENCY, INC.",705919,400 STATE HIGHWAY 34 EAST,MARBLE HILL,MO,63764,573 238 3652,...,,,,,,,,,,
1,BOLLINGER,FAMILY HOME,LICENSABLE,HOLLY HOESLI DAYCARE,2300674,16424 AZALEA LN,LEOPOLD,MO,63760-9614,573 238 5452,...,,,,,,,,,,
2,BOLLINGER,FAMILY HOME,LICENSABLE,"HOTOP, RANDA SHARLEAN",1063085,40072 STATE HIGHWAY K,PERRYVILLE,MO,63775-7779,573 788 2352,...,,,,,,,,,,
3,BOLLINGER,GROUP HOME,LICENSABLE,"SITZ, MARLA L",384016,10310 HERITAGE LN,ZALMA,MO,63787-8726,573 722 3009,...,,,,,,,,,,
4,BUTLER,CHILD CARE CENTER,LICENSABLE,BROSELEY PRESCHOOL & DAYCARE,2147395,2697 COUNTY ROAD 650,BROSELEY,MO,63932-8130,573 328 1154,...,,,,,,,,,,


In [5]:
# there are a lot of columns! look at names to inform our downselect
childcare.columns.values.tolist()


['COUNTY',
 'facility type',
 'regulatory status',
 'facility name',
 'dvn',
 'street address',
 'city',
 'state',
 'zip code',
 'facility phone',
 'director/provider',
 'effective date',
 'Anniversary/Expiration Date',
 'status',
 'from hours',
 'to hours',
 'Days of Operation',
 'Months of Operation',
 'minimum age',
 'maximum age',
 'total capacity',
 'owner name',
 'mailing street address',
 'mailing city',
 'mailing state',
 'mailing zip code',
 'Column1',
 'Column2',
 'Column3',
 'Column4',
 'Column5',
 'Column6',
 'Column7',
 'Column8',
 'Column9',
 'Column10',
 'Column11',
 'Column12',
 'Column13',
 'Column14',
 'Column15',
 'Column16',
 'Column17',
 'Column18',
 'Column19',
 'Column20',
 'Column21',
 'Column22',
 'Column23',
 'Column24',
 'Column25',
 'Column26',
 'Column27',
 'Column28',
 'Column29',
 'Column30',
 'Column31',
 'Column32',
 'Column33',
 'Column34',
 'Column35',
 'Column36',
 'Column37',
 'Column38',
 'Column39',
 'Column40',
 'Column41',
 'Column42',
 'Column4

In [6]:
# clean and covert the data

# reduce the childcare dataframe to only the columns we will need
childcare = childcare.loc[:,'COUNTY':'zip code']

# replace spaces in column names with '_' to make life easier
childcare.columns = childcare.columns.str.replace(' ', '_')

# this dataset covers the entire state, so filter it down to just St. Louis
# sorted(childcare.city.unique()) # uncomment this line to see a full list of cities
childcare = childcare.loc[childcare['city'] == 'ST LOUIS']

# merge together street address, city, and state to create the full address
childcare['full_address'] = childcare.street_address + "," + childcare.city + "," + childcare.state


In [7]:
# set up the geocoder
geolocator = Nominatim(timeout=10, user_agent = "myGeolocator")


#### Test out the geocoder with a couple of basic, manual addresses

In [8]:
# convert text addresses to lat/lon (geocoding)
location = geolocator.geocode('4550 Kester Mill Rd,Winston-Salem,NC')
print(location)
print((location.latitude, location.longitude))


Walmart Supercenter, 4550, Kester Mill Road, Winston-Salem, Forsyth County, North Carolina, 27103, United States
(36.06752315, -80.3372069310351)


In [9]:
# try out another address
location = geolocator.geocode('14 Spencers Xing,Saint Peters,MO')
print(location)
print((location.latitude, location.longitude))

14, Spencers Crossing, Spencer Creek South, Saint Peters, Saint Charles County, Missouri, 63376, United States
(38.77766939999999, -90.5907600713182)


#### Put together the childcare list + geocoder to get lat/lon that we can plot.
We already created a single "full address" column that can be passed to the geocoder. We can now use that to add a new column to the dataframe that includes lat/lon point data.

In [10]:
# pass the full addresses to the geocoder and store the results in a new column
childcare['geocode'] = childcare.full_address.apply(geolocator.geocode)


In [11]:
# remove rows that do not have location data
# childcare_nona = childcare.dropna().copy()
childcare_nona = childcare.dropna(subset=['geocode']).copy()


In [12]:
# how many entries didn't translate into lat/lon values?
print(len(childcare.index)-len(childcare_nona.index),'of',len(childcare.index),'entries failed geocoding')

25 of 375 entries failed geocoding


In [13]:
# get the latitude and longitude values from the geodata column and put them in their own columns for easier plotting
childcare_nona['lat'] = [g.latitude for g in childcare_nona.geocode]
childcare_nona['long'] = [g.longitude for g in childcare_nona.geocode]


In [14]:
# create a base map centered on St. Louis
map_childcare = folium.Map(
    location=[38.627003, -90.3],
    tiles='cartodbpositron',
    zoom_start=11,
)

# add a marker for each childcare facility
# label each facility with its name
for i in range(0,len(childcare_nona)):
   folium.Marker(
      location=[childcare_nona.iloc[i]['lat'], childcare_nona.iloc[i]['long']],
      popup=childcare_nona.iloc[i]['facility_name']
   ).add_to(map_childcare)

# display the map
map_childcare

<a id='buffer'></a>
## Add buffers to point data

Now that we have ingested the source data for public schools, private schools, and childcare facilities, we can create the buffer (residency restriction) zones around these points. Here is the buffer information from the [Missouri Sex Offender Registry Fact Sheet](https://www.mshp.dps.missouri.gov/MSHPWeb/PatrolDivisions/CRID/SOR/factsheet.html#specialConsiderations):
 >Certain sexual offenders may not reside within 1,000 feet of any public or private school up to the 12th grade or childcare facility which is in existence at the time of the offender establishing his or her residency.

#### Buffers for childcare facilities

In [15]:
# unlike the public and private school data, the childcare facility data didn't come from a shapefile
# we need to turn what we have into a geodataframe 

# create the 'geometry' column for the geodataframe
geometry = [Point(xy) for xy in zip(childcare_nona['long'], childcare_nona['lat'])]
# generate the geodataframe using the childcare dataframew + the geometry info
# set the standard CRS (in degrees) as part of this process
childcare_gdf = gpd.GeoDataFrame(childcare_nona, geometry = geometry, crs=4326) 


In [16]:
# understand the basic CRS that we already set
print('Starting CRS:',childcare_gdf.crs)

# reproject to local CRS with units in feet or meters
# https://epsg.io/6512 is expressed in meters
childcare_gdf = childcare_gdf.to_crs(6512)

print('New CRS:',childcare_gdf.crs)

Starting CRS: epsg:4326
New CRS: epsg:6512


In [17]:
# expand points to circles with radius = 1000 ft (the min residential distance for a registered sex offender)
mindist_ft = 1000
mindist_m = mindist_ft*0.3048 # convert feet to meters by multiplying by 0.3048
childcare_gdf['geometry'] = childcare_gdf.buffer(mindist_m) # convert feet to meters by multiplying by 0.3048


#### Plot buffers

In [18]:
# visualize the schools (with 1000ft buffers) on Open Street tiles
map_buffer_zones = folium.Map([38.6529545, -90.2411166], tiles='openstreetmap', zoom_start=13)

# apply the CHILDCARE FACILITY zones to the map
folium.GeoJson(childcare_gdf['geometry'], style_function=lambda feature: {
        'fillColor': '#db3e00', 'color': '#db3e00'}).add_to(map_buffer_zones)

# display map
map_buffer_zones

<a id='db'></a>
## Load data into PostGIS

Load the cleaned, geocoded, and ready-to-use data into a PostGIS database.

In [None]:
# we need GeoAlchemy2 to run the geodataframe to_postgis method later

In [7]:
pip install GeoAlchemy2

Note: you may need to restart the kernel to use updated packages.


In [8]:
# a few more imports specfic to the database process
import geoalchemy2 
import getpass

import psycopg2
import numpy
from psycopg2.extensions import adapt, register_adapter, AsIs

from sqlalchemy import create_engine


In [9]:
# get user password for connecting to the db
mypasswd = getpass.getpass()

Â·Â·Â·Â·Â·Â·Â·Â·


In [18]:
# set up db connection
conn = psycopg2.connect(database = 'cappsds_psmd39', 
                              user = 'psmd39', 
                              host = 'pgsql.dsa.lan',
                              password = mypasswd)


In [31]:
# establish cursor and read the existing tables
cursor = conn.cursor()

cursor.execute("""SELECT relname FROM pg_class WHERE relkind='r'
                  AND relname !~ '^(pg_|sql_)';""") # "rel" is short for relation.

tables = [i[0] for i in cursor.fetchall()] # A list() of tables.
tables.sort()
tables


['country_borders',
 'gadm_admin_borders',
 'geonames_feature',
 'spatial_ref_sys',
 'stlchildcare',
 'stlpubschools',
 'stlpvtschools']

#### Load STL childcare facility data

In [79]:
# copy the dataframe just in case
childcare_db = childcare_gdf.copy()

# drop more columns that we don't need
childcare_db.drop(['facility_type','regulatory_status','dvn','full_address','geocode'], inplace=True, axis=1)

# convert column names to lowercase for easier work later
childcare_db.columns = childcare_db.columns.str.lower()

# reorder the columns to match the form of the other tables
childcare_db = childcare_db[['facility_name','street_address','city','state','zip_code','county',
                                       'lat','long','geometry']]

# rename columns to match the form of the other tables
childcare_db.rename(columns = {'facility_name':'facility', 'street_address':'address', 'zip_code':'zip',
                               'lat':'latitude','long':'longitude'}, inplace = True)

childcare_db.head()


Unnamed: 0,facility,address,city,state,zip,county,latitude,longitude,geometry
877,"""TRAINING UP A CHILD"" LLC",1425 STEIN RD,ST LOUIS,MO,63135,ST LOUIS,38.756866,-90.281532,"POLYGON ((269293.251 324464.164, 269291.784 32..."
878,A BRIGHTER FUTURE CHILDCARE AND DEVELOPMENT CE...,2140 CHAMBERS RD,ST LOUIS,MO,63136-4346,ST LOUIS,38.74901,-90.255613,"POLYGON ((271548.337 323597.878, 271546.869 32..."
879,A CHILD'S HEART LEARNING CENTER LLC.,8020 SAINT CHARLES ROCK RD,ST LOUIS,MO,63114-5364,ST LOUIS,38.69914,-90.328902,"POLYGON ((265188.017 318047.707, 265186.550 31..."
880,A GENERATION CHOSEN PRE SCHOOL INC,1301 S FLORISSANT RD,ST LOUIS,MO,63121-1106,ST LOUIS,38.727633,-90.302545,"POLYGON ((267473.863 321215.072, 267472.395 32..."
881,"A PLACE FOR CHILDREN, INC.",10823 BIG BEND RD,ST LOUIS,MO,63122-6029,ST LOUIS,38.566986,-90.410277,"POLYGON ((258123.776 303368.311, 258122.308 30..."


In [80]:
# load the data!

# Set up database connection engine
# FORMAT: engine = create_engine('postgresql://user:password@host:5432/')
engine = create_engine(f'postgresql://psmd39:{mypasswd}@pgsql.dsa.lan:5432/cappsds_psmd39', echo=False)

# GeoDataFrame to PostGIS
childcare_db.to_postgis(
    con=engine,
    name="stlchildcare",
    if_exists='replace'
)


In [23]:
# test to make sure the load actually worked
# query the table and read data into a geodf 
sql = "select facility, latitude, longitude, geometry as geom from stlchildcare LIMIT 10;"
db_test_out = gpd.read_postgis(sql, conn)
db_test_out


Unnamed: 0,facility,latitude,longitude,geom
0,"""TRAINING UP A CHILD"" LLC",38.756866,-90.281532,"POLYGON ((269293.251 324464.164, 269291.784 32..."
1,A BRIGHTER FUTURE CHILDCARE AND DEVELOPMENT CE...,38.74901,-90.255613,"POLYGON ((271548.337 323597.878, 271546.869 32..."
2,A CHILD'S HEART LEARNING CENTER LLC.,38.69914,-90.328902,"POLYGON ((265188.017 318047.707, 265186.550 31..."
3,A GENERATION CHOSEN PRE SCHOOL INC,38.727633,-90.302545,"POLYGON ((267473.863 321215.072, 267472.395 32..."
4,"A PLACE FOR CHILDREN, INC.",38.566986,-90.410277,"POLYGON ((258123.776 303368.311, 258122.308 30..."
5,ABOVE AND BEYOND LEARNING CENTER LLC,38.496097,-90.322656,"POLYGON ((265774.790 295510.833, 265773.323 29..."
6,AGAPE' ACADEMY AND CHILD DEVELOPMENT CENTER,38.671308,-90.329524,"POLYGON ((265139.620 314958.146, 265138.152 31..."
7,ALL MY CHILDREN LEARNING CENTER 2 LLC,38.703309,-90.401454,"POLYGON ((258876.485 318501.112, 258875.017 31..."
8,ANGEL'S CURIOUS KIDS EARLY CHILDHOOD ACADEMIC ...,38.805356,-90.227058,"POLYGON ((274011.858 329859.463, 274010.390 32..."
9,APPLE OF YOUR EYE ACADEMY LLC,38.469906,-90.308109,"POLYGON ((267049.880 292606.153, 267048.412 29..."


In [15]:
#close connection to the db
conn.close()


# Summary

Now our St. Louis childcare facility data, which includes the restricted buffer zones, is available through the PostGIS database, meaning we don't have to go through the ingest, cleaning, and geocoding processes again. Also, we can use this database to perform geo operations and analysis with the data. We have now completed the data carpentry needed to create our overall restricted zones. In the next notebook, we will fuse it all together.