In [None]:
# import required packages
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns; sns.set(style="ticks", color_codes=True)
import geopandas as gpd
from geopandas import GeoDataFrame
import json
# load data
df = pd.read_csv('../../data-mapbox-animation/crime.csv', parse_dates={'Date':['YEAR', 'MONTH']}, 
      keep_date_col=True)
      
df.head()

In [None]:
# drop invalid rows
df = df[(df.Latitude != 0) & (df.Longitude != 0)]
# subsample 2000 rows at random
sample_df = df.sample(2000, random_state = 42)
sample_df

In [None]:
van_geojson = gpd.read_file("../../data-mapbox-animation/vancouver.geojson")

fig, ax = plt.subplots(figsize = (10,8))

van_geojson.boundary.plot(ax = ax, edgecolor = 'black')
# set theft type as scatterplot color
sns.scatterplot(sample_df['Longitude'], sample_df['Latitude'], marker = 'o', 
                hue = df['TYPE'], ax = ax)
                
# move the legend to the right of the plot
ax.legend(loc = 'center right', bbox_to_anchor=(1.7, 0.5), ncol=1) 

ax.axis('off')
plt.show()

In [None]:
def animate_map(time_col):
    fig = px.scatter_mapbox(sample_df,
              lat="Latitude" ,
              lon="Longitude",
              hover_name="TYPE",
              color="TYPE",
              animation_frame=time_col,
              mapbox_style='carto-positron',
              category_orders={
              time_col:list(np.sort(sample_df[time_col].unique()))
              },                  
              zoom=10)
    fig.show();
animate_map(time_col='YEAR')

In [None]:
df_cumsum = (sample_df
.query('NEIGHBOURHOOD != "Stanley Park"')
.replace({'Arbutus Ridge':'Arbutus-Ridge',
'Central Business District':'Downtown',
'Musqueam':'Dunbar-Southlands'})
)
                             
# count all crimes within each neighbourhood
df_counts_total = pd.DataFrame(df_cumsum.groupby('NEIGHBOURHOOD').size(),
                  columns = ['Count']).reset_index()
                  
# merge the count data with the shapefile for plotting
df_merged_total = van_geojson.set_index('name').join(df_counts_total.set_index('NEIGHBOURHOOD'))
df_merged_total

In [None]:
fig, ax = plt.subplots(figsize = (12,10))

df_merged_total.plot(column='Count', cmap='Reds', linewidth=1, ax=ax, 
                  edgecolor='0.3', alpha = 0.8)

# labels will be placed at the polygon's center
df_merged_total['lbl_pts'] = df_merged_total['geometry'].\
                          apply(lambda x: x.representative_point().coords[0])

# annotate polygons with neighborhood name abbreviations
for _ , row in df_merged_total.iterrows():
    plt.annotate(s=row['mapid'], xy=row['lbl_pts'], ha='center')
    

sm = plt.cm.ScalarMappable(cmap='Reds' , 
            norm=plt.Normalize(vmin=min(df_merged_total['Count']), 
                               vmax=max(df_merged_total['Count'])))

ax.axis('off')
cbar = fig.colorbar(sm)
                     
plt.show()

In [None]:
# groupby time and neighbourhood to get all incidents for timestamps
df_counts_rolling = pd.DataFrame(df_cumsum.groupby(['Date', 'NEIGHBOURHOOD']).size(), 
                    columns = ['Count']).reset_index()
                    
df_counts_rolling

In [None]:

from itertools import product

# initiate empty df with every combination of Time x Neighborhood 
df_crime_final = pd.DataFrame(product(df_counts_rolling['NEIGHBOURHOOD'].unique(),
                 df_counts_rolling['Date'].unique()), columns = ['NEIGHBOURHOOD', 'Date'])
                 
# get cumulated sum of crime counts for available time/neighborhood combinations
df_crime_cumsum = pd.DataFrame(df_counts_rolling.groupby(['NEIGHBOURHOOD', 'Date']).\
                  sum().groupby(level=0).cumsum().reset_index())
                  
# merge cumsum df with empty df
df_crime_final = df_crime_final.merge(df_crime_cumsum, on = ['NEIGHBOURHOOD', 'Date'], how = 'left').\
                 sort_values(['NEIGHBOURHOOD', 'Date'])
                 
# fill empty values by repeating previous values and adding 0s for initial timestamps
df_crime_final = df_crime_final.groupby('NEIGHBOURHOOD').\
                 apply(lambda x : x.ffill().fillna(0))
                 
df_crime_final

In [None]:
# reformat Date as string for plotly
df_crime_final['Date'] = df_crime_final['Date'].astype(str)

fig = px.choropleth_mapbox(df_crime_final,
                           geojson=van_geojson,
                           featureidkey='properties.name',
                           locations='NEIGHBOURHOOD',
                           color='Count',
                           hover_name='NEIGHBOURHOOD',
                           hover_data=['Count'],
                           color_continuous_scale='Reds',
                           animation_frame='Date',
                           mapbox_style='carto-positron',
                           title='Cumulative Numbers of Crimes in Vancouver Neighborhoods',
                           center={'lat':49.25, 'lon':-123.13},
                           zoom=11,
                           opacity=0.75,
                           labels={'Count':'Count'},
                           width=1100,
                           height=800
                          )

fig.show()