# related papers
http://journals.sagepub.com/doi/pdf/10.1177/0081175015576601

http://www.lifescienceglobal.com/pms/index.php/ijcs/article/view/5052/2853

https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2770506

https://www.ijdp.org/article/S0955-3959(17)30288-8/abstract

http://article.socialsciencesjournal.org/pdf/10.11648.j.ss.20180702.15.pdf

https://www.sciencedirect.com/science/article/pii/S0091743503002330

https://www.sciencedirect.com/science/article/pii/S0013935109000498

https://onlinelibrary.wiley.com/doi/abs/10.1111/ecpo.12102

http://journals.sagepub.com/doi/abs/10.1177/0003122416663494

http://journals.sagepub.com/doi/pdf/10.1177/0022427806298356

https://waset.org/abstracts/environmental-and-ecological-engineering/76986

http://www.mdpi.com/2220-9964/7/2/43

http://discovery.ucl.ac.uk/11291/1/11291.pdf (hot spot report)

In [1]:
import pandas as pd
from shapely.geometry import Point
import geopandas as gp

import datetime
from dateutil.relativedelta import relativedelta


from src import constants as C
from src.data_prep import prep_data_from_raw, prep_911, prep_crime

# load data

In [2]:
d911 = prep_911(by_category=False, gpdf=True, coords_series=False)
d911.reset_index(inplace=True)
d911.crs

{'init': 'epsg:3559', 'no_defs': True}

In [3]:
crimes = prep_crime(path='data/open-baltimore/clean/crimes-dev-set.csv',by_category=False, gpdf=True, coords_series=False)
crimes.reset_index(inplace=True)

In [4]:
# crimes = prep_data_from_raw('data/open-baltimore/raw/BPD_Part_1_Victim_Based_Crime_Data.csv', 
#                  col_lon='Longitude', col_lat='Latitude', col_date='CrimeDate', to_epsg=3559)

# crimes= crimes.loc['2015-01-01':]
# crimes['geometry'] = crimes.Coords.apply(lambda x: Point(*x))
# crimes = gp.GeoDataFrame(crimes)
# crimes.geometry = crimes.buffer(30)
# crimes.reset_index(inplace=True)
# crimes.crs = {'init': 'epsg:3559', 'no_defs': True}

# sjoin

In [5]:
# do sjoin per (Year, Month)
ym_crime = set(crimes.DateTime.apply(lambda x: (x.year, x.month)).unique())
ym_911 = set(d911.DateTime.apply(lambda x: (x.year, x.month)).unique())
yms = list(ym_crime & ym_911)
len(yms)

24

In [6]:
# sjoin within 100 meters
# d911.geometry = d911.buffer(30)
crimes.geometry = crimes.buffer(100)

In [7]:
pairs = []
for y, m in yms:
    # - 1 day in case a crime happened just seconds before 1st of a month
    # include the 1st of next month in case a crime happend just seconds after this month
    # this may create duplicates, drop duplicates later
    date = datetime.datetime(year=y, month=m, day=1)
    start = date - datetime.timedelta(days=1)
    end = date + relativedelta(months=+1)
    
    # slice data by start and end date
    sted_crimes = crimes[(crimes.DateTime>=start) & (crimes.DateTime<=end)]
    sted_d911 = d911[(d911.DateTime>=start) & (d911.DateTime<=end)]
    
    # Spatial constraint: 911 call is within X-meter radius of crime
    joined = gp.sjoin(sted_d911[['DateTime', 'geometry']], sted_crimes[['DateTime', 'geometry']])
    
    # DateTime constraint: 911 call is within 12 hours after crime happened, 
    # or 911 can also be within 1 hour before crime, considering the crimeTime may not be accurate
    joined = joined.reset_index().rename(columns={'index': 'i911', 'index_right': 'icrime'})
    joined['dt_diff'] = joined.apply(lambda x: (x.DateTime_left - x.DateTime_right).total_seconds()/3600, axis=1)
    cond = (joined.dt_diff >= -1) & (joined.dt_diff <= 12 )
    pairs.append(joined[cond][['i911', 'icrime', 'dt_diff']])
#     break

pairs = pd.concat(pairs, ignore_index=True)
# drop duplicates
pairs = pairs[~pairs.duplicated()]

In [8]:
num_crimes = crimes[(crimes.DateTime>='2015-01-01') & (crimes.DateTime<='2017-01-01')].shape[0]
num_911 = d911[(d911.DateTime>='2015-01-01') & (d911.DateTime<='2017-01-01')].shape[0]

In [9]:
idx_crimes = pairs.icrime.unique()
idx_911 = pairs.i911.unique()

In [10]:
'crime: %d/%d=%.02f, 911: %d/%d=%.02f' % (len(idx_crimes), num_crimes,len(idx_crimes) / num_crimes, len(idx_911), num_911, len(idx_911) / num_911)

'crime: 49714/92820=0.54, 911: 72460/634100=0.11'

# add info to pairs

In [11]:
# information to be added to pairs

col_c = ['DateTime', 'Category', 'Latitude', 'Longitude', 'Weapon', 'Neighborhood', 'In/Outside', 'Coords']
target_c = crimes.loc[idx_crimes][col_c]
col_c = [c+'_c' for c in col_c]
target_c.columns = col_c

col_9 = ['DateTime', 'Category', 'description', 'Latitude', 'Longitude', 'priority', 'Coords']
target_9 = d911.loc[idx_911][col_9]
col_9 = [c+'_9' for c in col_9]
target_9.columns = col_9

In [12]:
# for do-over merge
# pairs = pairs[['i911', 'icrime', 'dt_diff', 'distance']]

In [13]:
pairs = pairs.merge(target_9, left_on='i911', right_index=True).merge(target_c, left_on='icrime', right_index=True)

In [None]:
from scipy.spatial import distance
pairs['distance'] = pairs.apply(lambda x: distance.euclidean(x.Coords_9, x.Coords_c), axis=1)

In [18]:
pairs.set_index(['icrime', 'Category_c', 'DateTime_c','i911']).sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,dt_diff,DateTime_9,Category_9,description_9,Latitude_9,Longitude_9,priority_9,Coords_9,Latitude_c,Longitude_c,Weapon_c,Neighborhood_c,In/Outside_c,Coords_c,distance
icrime,Category_c,DateTime_c,i911,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
91050,theft_larceny,2014-12-31 15:20:00,83,10.216667,2015-01-01 01:33:00,disorderly conduct,Disorderly,39.328423,-76.608901,Medium,"(433721.5758121457, 184538.19260919775)",39.32829,-76.60813,,Better Waverly,I,"(433788.1164185589, 184523.7122174631)",68.097974
91051,robbery,2014-12-31 15:00:00,148,11.633333,2015-01-01 02:38:00,assault,Common Assault,39.327858,-76.562455,Medium,"(437726.5426745453, 184493.64293864692)",39.32795,-76.56246,FIREARM,Belair-Edison,O,"(437726.0626077657, 184503.8545607505)",10.222900
91056,assault,2014-12-31 21:30:00,90,4.133333,2015-01-01 01:38:00,assault,Common Assault,39.323183,-76.589056,Medium,"(435435.2709522915, 183963.97267303546)",39.32390,-76.58918,HANDS,Coldstream Homestead Mont,I,"(435424.22041218966, 184043.52470707803)",80.315880
91056,assault,2014-12-31 21:30:00,104,4.400000,2015-01-01 01:54:00,insecurity,Armed Person,39.323183,-76.589056,High,"(435435.2709522915, 183963.97267303546)",39.32390,-76.58918,HANDS,Coldstream Homestead Mont,I,"(435424.22041218966, 184043.52470707803)",80.315880
91060,theft_larceny,2014-12-31 23:30:00,239,5.050000,2015-01-01 04:33:00,insecurity,Injured Person,39.274281,-76.627721,Medium,"(432123.38772287883, 178520.68173704125)",39.27408,-76.62769,,Carroll - Camden Industri,I,"(432126.1536581124, 178498.3782481218)",22.474341
91060,theft_larceny,2014-12-31 23:30:00,247,5.150000,2015-01-01 04:39:00,theft_larceny,Larceny,39.274281,-76.627721,Low,"(432123.38772287883, 178520.68173704125)",39.27408,-76.62769,,Carroll - Camden Industri,I,"(432126.1536581124, 178498.3782481218)",22.474341
91060,theft_larceny,2014-12-31 23:30:00,306,6.566667,2015-01-01 06:04:00,assault,Common Assault,39.274281,-76.627721,Medium,"(432123.38772287883, 178520.68173704125)",39.27408,-76.62769,,Carroll - Camden Industri,I,"(432126.1536581124, 178498.3782481218)",22.474341
91060,theft_larceny,2014-12-31 23:30:00,318,7.100000,2015-01-01 06:36:00,disorderly conduct,Disorderly,39.274281,-76.627721,Medium,"(432123.38772287883, 178520.68173704125)",39.27408,-76.62769,,Carroll - Camden Industri,I,"(432126.1536581124, 178498.3782481218)",22.474341
91063,theft_larceny,2014-12-31 23:00:00,420,11.816667,2015-01-01 10:49:00,theft_larceny,Larceny,39.276717,-76.625313,Low,"(432330.0592148789, 178791.96908203876)",39.27648,-76.62549,,Carroll - Camden Industri,I,"(432314.8947064208, 178765.59538923323)",30.422590
91064,theft_larceny,2014-12-31 23:00:00,314,7.266667,2015-01-01 06:16:00,theft_larceny,Larceny f/Auto,39.305612,-76.588281,Low,"(435510.895813598, 182013.57585186552)",39.30559,-76.58806,,Broadway East,O,"(435529.9680265204, 182011.21945289162)",19.217230


# plot on map

In [11]:

from importlib import reload
import src.vis; reload(src.vis)
import src.vis.map; reload(src.vis.map)
from src.vis.map import marker_cluster, marker_cluster_fast


In [17]:
from wKit.vis.map import marker_cluster_fast

In [13]:
name_location = {
    'crimes': list(zip(target_c.Latitude.tolist(), target_c.Longitude.tolist())),
    '911': list(zip(target_9.Latitude.tolist(), target_9.Longitude.tolist()))
}

In [20]:
marker_cluster_fast(name_location, lonlat=False, filename='tmp/crime_911.html')