In [1]:
import dask.dataframe as dd
import geopandas as gpd
import folium # leaflet.js py map
from folium import plugins
import numpy as np 
import pandas as pd
import os

In [2]:
print('Required Python libraries:')
print('Pandas:', pd.__version__)
print('GeoPandas:', gpd.__version__)
print('Folium:', folium.__version__)

Required Python libraries:
Pandas: 0.20.3
GeoPandas: 0.2.1
Folium: 0.4.0


In [3]:
%%time
# set data file path
parquet_data_folder = '../data/crimes-2017.snappy.parq'
print('Loading crime data from: {}'.format(parquet_data_folder))

# load crimes parquet data into dask df
crimes = dd.read_parquet(parquet_data_folder, index='Date')

# load all data into memory
crimes = crimes.persist()
print('Crime data loaded into memory.')

# log records count and data frame stats
print('Crime data stats:')
print('---------------------------------------')
print('{:,} total records in {} partitions'.format(len(crimes), crimes.npartitions))
print('DataFrame size: {:,}'.format(crimes.size.compute()))

Loading crime data from: ../data/crimes-2017.snappy.parq
Crime data loaded into memory.
Crime data stats:
---------------------------------------
172,030 total records in 1 partitions
DataFrame size: 2,408,420
Wall time: 3.07 s


In [4]:
# get crime geo data for mapping homicides
crime_geo = crimes[['PrimaryType',
                    'Block',
                    'Description',
                    'LocationDescription',
                    'CommunityArea',
                    'Arrest',
                    'Domestic',
                    'Latitude', 
                    'Longitude']].dropna()

# get homicides
homicides = crime_geo[(crime_geo['PrimaryType']=='HOMICIDE')].compute()
print('2017 Chicago homicides data preview:')
print('--------------------------------------------------------------------------')
print(homicides.head())
print('...')
print('Total 2017 homicides:', len(homicides))

2017 Chicago homicides data preview:
--------------------------------------------------------------------------
                    PrimaryType                 Block          Description  \
Date                                                                         
2017-01-01 05:19:00    HOMICIDE      046XX N BROADWAY  FIRST DEGREE MURDER   
2017-01-01 06:18:00    HOMICIDE     046XX W MONROE ST  FIRST DEGREE MURDER   
2017-01-02 09:14:00    HOMICIDE    025XX N LOWELL AVE  FIRST DEGREE MURDER   
2017-01-03 12:20:00    HOMICIDE   034XX W FULTON BLVD  FIRST DEGREE MURDER   
2017-01-03 23:52:00    HOMICIDE  032XX W LEXINGTON ST  FIRST DEGREE MURDER   

                    LocationDescription CommunityArea  Arrest  Domestic  \
Date                                                                      
2017-01-01 05:19:00              TAVERN           3.0    True     False   
2017-01-01 06:18:00              STREET          25.0   False     False   
2017-01-02 09:14:00              STREET  

In [21]:
homicides_geo = homicides[['Latitude', 'Longitude']]
print(homicides_geo.head())
print(homicides.index)

                      Latitude  Longitude
Date                                     
2017-01-01 05:19:00  41.966082 -87.657908
2017-01-01 06:18:00  41.879291 -87.741599
2017-01-02 09:14:00  41.926841 -87.735416
2017-01-03 12:20:00  41.886341 -87.712000
2017-01-03 23:52:00  41.871868 -87.706610
DatetimeIndex(['2017-01-01 05:19:00', '2017-01-01 06:18:00',
               '2017-01-02 09:14:00', '2017-01-03 12:20:00',
               '2017-01-03 23:52:00', '2017-01-04 00:41:00',
               '2017-01-04 05:59:00', '2017-01-05 23:30:00',
               '2017-01-06 01:43:00', '2017-01-07 10:07:00',
               ...
               '2017-08-20 02:19:00', '2017-08-20 02:21:00',
               '2017-08-20 02:35:00', '2017-08-20 05:31:00',
               '2017-08-20 05:35:00', '2017-08-20 11:12:00',
               '2017-08-20 19:01:00', '2017-08-22 17:13:00',
               '2017-08-23 11:02:00', '2017-08-24 09:50:00'],
              dtype='datetime64[ns]', name='Date', length=441, freq=None)


In [23]:
# create Chicago map
CHICAGO_COORDINATES = (41.85, -87.68)
map_attributions = ('&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> '
        'contributors, &copy; <a href="http://cartodb.com/attributions">CartoDB</a>')
chicago_homicides_map = folium.Map(location=CHICAGO_COORDINATES, 
                                 attr=map_attributions,
                                 tiles='Cartodb Positron', #'OpenStreetMap',
                                 zoom_start=10, min_zoom=10,
                                 control_scale=True)

# create homicides heatmap
homicides_heatmap = plugins.HeatMapWithTime(homicides_geo,
                                            index=homicides.index,
                                            auto_play=True,
                                            max_opacity=0.5,
                                            name='2017 Chicago Homicides Heat Map')
homicides_heatmap.add_to(chicago_homicides_map)

# add time lapse?

# add fullscreen toggle
plugins.Fullscreen(
    position='topright',
    title='full screen',
    title_cancel='exit full screen',
    force_separate_button=True).add_to(chicago_homicides_map)

# save map for demo
chicago_homicides_map.save(os.path.join('../maps/', 'chicago-homicides-2017-map.html'))

# show homicides map
chicago_homicides_map