In [1]:
import calendar 
import datetime
import dask.dataframe as dd
import numpy as np 
import pandas as pd 

In [2]:
%%time
# set data file path
parquet_data_folder = '../data/crimes-2017.snappy.parq'
print('Loading crime data from: {}'.format(parquet_data_folder))

# load crimes parquet data into dask df
crimes = dd.read_parquet(parquet_data_folder, index='Date')

# load all data into memory
crimes = crimes.persist()
print('Crime data loaded into memory.')

Loading crime data from: ../data/crimes-2017.snappy.parq
Crime data loaded into memory.
Wall time: 3.03 s


In [3]:
%%time
# log records count and data partitions
print('Crime data stats:')
print('---------------------------------------')
print('{:,} total records in {} partitions'.format(len(crimes), crimes.npartitions))
print('DataFrame size: {:,}'.format(crimes.size.compute()))

Crime data stats:
---------------------------------------
172,030 total records in 1 partitions
DataFrame size: 2,408,420
Wall time: 0 ns


In [4]:
# get crime geo data for mapping homicides
crime_geo = crimes[['PrimaryType',
                    'Block',
                    'Description',
                    'LocationDescription',
                    'CommunityArea',
                    'Arrest',
                    'Domestic',
                    'Latitude', 
                    'Longitude']].dropna()

# get homicides
homicides = crime_geo[(crime_geo['PrimaryType']=='HOMICIDE')].compute()
print(homicides.head())
print('...')
print('Total 2017 homicides:', len(homicides))

                    PrimaryType                 Block          Description  \
Date                                                                         
2017-01-01 05:19:00    HOMICIDE      046XX N BROADWAY  FIRST DEGREE MURDER   
2017-01-01 06:18:00    HOMICIDE     046XX W MONROE ST  FIRST DEGREE MURDER   
2017-01-02 09:14:00    HOMICIDE    025XX N LOWELL AVE  FIRST DEGREE MURDER   
2017-01-03 12:20:00    HOMICIDE   034XX W FULTON BLVD  FIRST DEGREE MURDER   
2017-01-03 23:52:00    HOMICIDE  032XX W LEXINGTON ST  FIRST DEGREE MURDER   

                    LocationDescription CommunityArea  Arrest  Domestic  \
Date                                                                      
2017-01-01 05:19:00              TAVERN           3.0    True     False   
2017-01-01 06:18:00              STREET          25.0   False     False   
2017-01-02 09:14:00              STREET          20.0    True     False   
2017-01-03 12:20:00              STREET          27.0   False     False   
201