## Using Folium to Create Commute Line Maps

In [1]:
import pathlib
import geopandas as gp
import folium

#### Set input file path and name

In [2]:
mypath = pathlib.Path('../data')
print(mypath)

../data


In [3]:
lex= mypath.joinpath('lex_all.geojson')
print(lex)

../data/lex_all.geojson


__Folium should be able to read the geojson.  It throws an error:__

```
ValueError: Cannot render objects with any missing geometries. 
<_io.TextIOWrapper name='../data/lex_limited_output.json' mode='r' encoding='UTF-8'>```
### Read geojson with GeoPandas

In [4]:
gdf = gp.read_file(lex)
print (len(gdf))
gdf.info()

20264
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 20264 entries, 0 to 20263
Data columns (total 4 columns):
w_geocode    20264 non-null object
h_geocode    20264 non-null object
distance     20264 non-null float64
geometry     20264 non-null object
dtypes: float64(1), object(3)
memory usage: 633.3+ KB


#### Check for null records

In [5]:
gdf_null = gdf[gdf.isnull().any(axis=1)]
gdf_null.head()

Unnamed: 0,w_geocode,h_geocode,distance,geometry


#### Folium has a difficult time mapping large amounts of data  
__Below are examples on how to slice and or specify data for mapping__

#### Add styling for the lines based on distance

In [6]:
style_function = lambda x: {'color': 'red' if x['properties']['distance'] > 90000 else 'blue' 
                           if x['properties']['distance'] > 50000 else 'yellow',
                           'weight': .5, 'opacity': .3}

#### Set max straight line distance  
Since many large employers have regional locations setting a maximum straight line distance is necessary.
Distance is in meters. 100,000 meters translates to roughly 62 miles. Since this is straight line this distance can
translate to a drive time commute of 1.5 hours.


In [7]:
distance_max = 100000

#### Map the workers for Baptist Health Lexington

In [8]:
# Filter data to the block.
cb_hosp_f1 = gdf[gdf['w_geocode'].str.startswith('21067002400')]
# Eliminate distances that are too great to commute daily to remove these occurences.
cb_hosp = cb_hosp_f1[cb_hosp_f1['distance'] < distance_max]
print(len(cb_hosp))
df2map = cb_hosp

1959


In [9]:
m = folium.Map(location=[37.645556, -84.769722], tiles='cartodbpositron',
                zoom_start=7.5, control_scale=True, prefer_canvas=True)
folium.features.GeoJson(df2map, style_function= style_function).add_to(m)
m

#### Map University of KY Employees

In [10]:
# Filter data to the block.
uk_f1 = gdf[gdf['w_geocode'].str.startswith('210670008022002')]
# Eliminate distances that are too great to commute daily to remove these occurences.
uk = uk_f1[uk_f1['distance'] < distance_max]
print (len(uk))
df2map = uk[:4000]

4390


In [11]:
m = folium.Map(location=[37.645556, -84.769722], tiles='cartodbpositron',
                zoom_start=7.5, control_scale=True, prefer_canvas=True)
folium.features.GeoJson(df2map, style_function= style_function).add_to(m)
m

#### Map University of KY Healthcare Employees

In [12]:
# Filter data to the block.
uk_hosp_f1 = gdf[gdf['w_geocode'].str.startswith('210670008011006')]
# Eliminate distances that are too great to commute daily to remove these occurences.
uk_hosp = uk_hosp_f1[uk_hosp_f1['distance'] < distance_max]
len(uk_hosp)

3838

In [13]:
m = folium.Map(location=[37.645556, -84.769722], tiles='cartodbpositron',
                zoom_start=7.5, control_scale=True, prefer_canvas=True)
folium.features.GeoJson(uk_hosp, style_function= style_function).add_to(m)
m

#### Map Fayette County School Employees

In [14]:
# Filter data to the block.
fcps_f1 = gdf[gdf['w_geocode'].str.startswith('210670005001014')]
# Eliminate distances that are too great to commute daily to remove these occurences.
fcps = fcps_f1[fcps_f1['distance'] < 100000]
len(fcps)

2554

In [15]:
m = folium.Map(location=[37.645556, -84.769722], tiles='cartodbpositron',
                zoom_start=7.5, control_scale=True, prefer_canvas=True)
folium.features.GeoJson(fcps, style_function= style_function).add_to(m)
m

In [16]:
lex_unique = gdf.w_geocode.unique()
lex_unique

array(['210670005001014', '210670008011006', '210670008022002',
       '210670024001012', '210670037042039', '210670042041030'],
      dtype=object)

In [17]:
lex_summary = gdf.w_geocode.value_counts()
lex_summary.head(25)

210670008022002    5431
210670008011006    4562
210670037042039    2891
210670005001014    2614
210670024001012    2553
210670042041030    2213
Name: w_geocode, dtype: int64

In [18]:
gdf.w_geocode.unique()

array(['210670005001014', '210670008011006', '210670008022002',
       '210670024001012', '210670037042039', '210670042041030'],
      dtype=object)