In [None]:
# read dataset and visualize first rows
import imageio
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from IPython.display import HTML

pd.set_option('max_colwidth', 60)

# load datasets
df = pd.read_csv('crime_csv_all_years.csv')
census = pd.read_csv('BC Development Region, Regional District and Muncipal Population Estimates 2001-2018.csv')
lbnd_df = gpd.read_file('local_area_boundary.shp')

# Build a list of years (2003-2018) and build an array of months (1-12)
years = df.YEAR.unique()
years = np.delete(years, len(years)-1)
months = np.arange(1,13)

# Create a new dataframe to hold the frequency of each crime for year
df_types = pd.DataFrame(df.TYPE.unique())
df_types.columns = ['type']

# Create a list of years
counts = []
for year in years:
    temp = df[df['YEAR']==year]['TYPE'].value_counts()
    df_types[str(year)] = temp.values
    
# Create change columns in the census dataframe with the % of change in the population
for year in years:
    census[(str(year-1)+'-'+str(year)[-2:])] = (census[str(year)]/census[str(year-1)]-1)

In [None]:
#census.head()
#df_types.head()

In [None]:
# get image names
def get_names(myList):
    filenames = []
    for text in myList:
        if myList[0]==text or myList[-1]==text:
            filenames.extend([str(text)+'.png']*3)
        else:
            filenames.append(str(text)+'.png')
    return filenames
    
# build a .gif with the pictures
def build_gif(filenames, title, duration=1):
    images = []
    for filename in filenames:
        images.append(imageio.imread(filename))

    imageio.mimwrite((title + '.gif'), images, duration=duration)
    
# build a dataframe with the proportions for each type of crime
def get_proportions(df_types, census, area, proportion = 100000, years = years):
    new_df = pd.DataFrame(df_types['type'].values)
    new_df.columns = ['type']
    for year in years:
        temp = df_types[str(year)].values
        temp = temp / census.loc[census['Name'] == area, [str(year)]].values
        temp = temp*proportion
        new_df[str(year)] = temp[0]
        
    return new_df

# prepare data for ploting neighbourhood maps
def prepare_data(df, year, crime):
    nb_list = df[df['YEAR']==year].copy()
    nb_list = nb_list[nb_list['TYPE']==crime]

    #clean
    notin = ['Musqueam', 'Stanley Park']
    nb_list[~nb_list.NEIGHBOURHOOD.isin(notin)]
    nb_list = nb_list.replace('Central Business District','Downtown')
    nb_list = nb_list.replace('Arbutus Ridge','Arbutus-Ridge')

    #group and count
    nb_list = nb_list.groupby(['NEIGHBOURHOOD']).count()
    nb_list = nb_list[['TYPE']].add_suffix('_Count').reset_index()
    
    return nb_list

In [None]:
df_van = get_proportions(df_types, census, 'Vancouver')
df_metro = get_proportions(df_types, census, 'Metro Vancouver')

In [None]:
#df_van.head()
#df_metro.head()

In [None]:
for year in years:
    fig, ax = plt.subplots(1, figsize=(20, 10))
    fig.patch.set_facecolor('xkcd:white')
    # grid
    ax.grid(color='gray', linestyle='dashed', linewidth=1, axis = 'x')
    ax.set_axisbelow(True)
    # bar chart
    barcollection = plt.barh(df_types['type'], df_types[str(year)], color='#FE4637')
    # details
    plt.title('Vancouver - BC\nReported Crimes in ' + str(year), fontsize=20)
    plt.xlim(0, 20000)
    plt.tick_params(labelsize=16)
    plt.subplots_adjust(left=0.3)
    # bar values
    y = df_types[str(year)].values
    for idx, b in enumerate(barcollection):
        b.set_width(y[idx])
        plt.text(y[idx] + 10, idx -0.15, "{:,d}".format(y[idx]),fontsize=14)
    # save
    plt.savefig(str(year) + '.png', facecolor=fig.get_facecolor(), edgecolor='none')
    plt.close(fig)
    
build_gif(get_names(years), 'all_crimes')
HTML('<img src="all_crimes.gif">')

In [None]:
fig, ax = plt.subplots(1, figsize=(17, 10))

plt.bar(df_types.columns[1:].values, df_types.iloc[1:2,1:].values[0], 0.75)
plt.bar(df_types.columns[1:].values, df_types.iloc[0:1,1:].values[0], 0.75, bottom=df_types.iloc[1:2,1:].values[0])
    
plt.legend(df_types['type'][1::-1], loc ='upper right')
plt.savefig('vehicle.png')

type_dict = {'Theft from Vehicle':0,
             'Theft of Vehicle':1,
             'Other Theft':2,
             'Break and Enter Residential/Other':3,
             'Mischief':4,
             'Theft of bicycle':6}
sub_location = 321
fig, ax = plt.subplots(1, figsize=(20, 15))
ax.axis('off')

for label, value in type_dict.items():
    ax1 = fig.add_subplot(sub_location)
    plt.plot(df_types.columns[1:].values, df_types.iloc[value:value+1,1:].values[0])
    plt.title(label)
    sub_location += 1
plt.savefig('overall.png')

In [None]:
# Population Changes
fig, ax = plt.subplots(1, figsize=(17, 6))

n = np.arange(15)
plt.bar(n+0.2, census.iloc[0,22:37].values, width=0.4, align='center', color ='#16A085')
plt.bar(n-0.2, census.iloc[1,22:37].values, width=0.4, align='center', color = '#CD6155')

plt.title('Population change in %')
plt.legend(['metro','van'])
plt.xticks(n, census.iloc[:,22:37].columns)
ax.grid(color='gray', linestyle='dashed', linewidth=1, axis = 'y')
ax.set_axisbelow(True)

vals = ax.get_yticks()
ax.set_yticklabels(['{:,.2%}'.format(x) for x in vals])

plt.savefig('pop_change.png')


# Crime proportions
fig, ax = plt.subplots(1, figsize=(20, 10))
ax.axis('off')

sub_location = 321

for label, value in type_dict.items():
    ax1 = fig.add_subplot(sub_location)
    
    plt.plot(df_metro.iloc[:,1:].columns, df_metro.iloc[value,1:].values.reshape(16,1), color ='#16A085')
    ax1.set_ylabel('Metro-Vancouver')
    plt.legend(['metro'], loc = 3)
    
    ax2 = ax1.twinx()
    
    plt.plot(df_van.iloc[:,1:].columns, df_van.iloc[value,1:].values.reshape(16,1), color = '#CD6155')
    ax2.set_ylabel('Vancouver')
    plt.legend(['van'], loc = 4)
    
    plt.title(label+' in Vancouver for each 100k habitants')
    
    sub_location +=1
plt.savefig('types.png')

In [None]:
ctype = 'Mischief'
vmin, vmax = 0, 800

for yr in df['YEAR'].unique():
    nb_list = prepare_data(df, yr, ctype)
    lbnd_df = gpd.read_file('local_area_boundary.shp')
    
    tab = lbnd_df.copy()
    centroids = lbnd_df.copy()
    centroids.set_geometry(centroids["geometry"].centroid, inplace = True)
    
    lbnd_df = lbnd_df.set_index('NAME').join(nb_list.set_index('NEIGHBOURHOOD'))
    
    
    fig, ax = plt.subplots(1, figsize=(22, 16))
    fig.patch.set_facecolor('xkcd:grey')
    
    ax.axis('off')
    ax = fig.add_subplot(111)
    
    cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["green","yellow","red"])
    lbnd_df.plot(cmap=cmap, column = 'TYPE_Count', ax=ax, linewidth=1, edgecolor='0.8',vmin=vmin, vmax=vmax)
    
    # color bar
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="3%", pad=0.05)
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))
    sm._A = []
    cbar = fig.colorbar(sm, cax=cax)
    cbar.ax.tick_params(labelsize=14)
    texts=[]
    for x, y, label in zip(centroids.geometry.x, centroids.geometry.y, centroids['MAPID']):
        texts.append(ax.text(x, y, label, fontsize = 12, color='black'))
    ax.text(488800, 5448600, r'VANCOUVER, BC, CANADA', fontsize=20)
    
    ax.set_title(ctype.upper() +' '+str(yr), fontdict = {
            'fontsize': 32,
            'fontweight' : 5,
            'verticalalignment': 'baseline',
            'horizontalalignment': 'center'})
    
    ax.axis('off')
    
    plt.savefig(str(yr) + '.png', facecolor=fig.get_facecolor(), edgecolor='none')
    plt.close(fig)
    
build_gif(get_names(years), (ctype+'_map'))
HTML('<img src="'+ ctype +'_map.gif">')

In [None]:
years = df.YEAR.unique()
years = np.delete(years, len(years)-1)
months = np.arange(1,13)

In [None]:
years = [2003]
months = [12]

In [None]:
years = [2019]
months = [1,2,3,4,5]

In [None]:
# Load data into dataframes
bnd_df = gpd.read_file('city_boundary.shp')
ps_df = gpd.read_file('public_streets.shp')
non_city_street_df = gpd.read_file('non_city_streets.shp')
df_hex = df.loc[df['TYPE']=='Mischief']

cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["grey","lightsalmon","coral","orangered","red"])

fontdict = {'fontsize': 32, 
            'fontweight' : 100, 
            'color' : 'white', 
            'verticalalignment': 'baseline',
            'horizontalalignment': 'center'}

vmin=0
vmax=50
img_list = ['2003_12']
img_list.extend(['2003_12']*16)

for year in years:
    for month in months:
        df_temp = df_hex.copy()
        df_temp = df_temp[(((df_temp.YEAR==(year-1)) & (df_temp.MONTH>month)) | 
                               ((df_temp.YEAR==year) & (df_temp.MONTH<=month)))]

        # Setup figure and axis
        fig, ax = plt.subplots(1, figsize=(25, 20))
        ax.set_aspect('equal')
        # plot streets and boundaries
        lbnd_df.plot(linewidth=1, edgecolor='0.1', ax=ax, facecolor='None', alpha=0.6)
        ps_df.plot(linewidth=1, edgecolor='white', ax=ax, facecolor='None', alpha=0.4)
        non_city_street_df.plot(linewidth=1, edgecolor='white', ax=ax, facecolor='None', alpha=0.4)
        bnd_df.plot(linewidth=0.5, edgecolor='0.1', ax=ax, color = 'Blue', alpha=0.6)
        # plot hexbin
        df_temp.plot(kind='hexbin',x='X',y='Y',gridsize=64, extent=[483000,498500,5448900,5463000], edgecolor='0.4', 
                     figsize=(16, 12), cmap=cmap, ax=ax, vmin=vmin, vmax=vmax, colorbar=False)
        # plot colorbar
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="3%", pad=0.05)
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))
        sm._A = []
        cbar = fig.colorbar(sm, cax=cax)
        cbar.ax.tick_params(labelsize=14)
        cbar.ax.set_yticklabels(['0', '10', '20', '30', '40', '> 50'], color='white')
        # plot title
        ax.set_title('MISCHIEF', fontdict = fontdict)
        # build text for displaying the month/year
        if (month==12):
            start = ("{:02d}".format(month-11))+'/'+("{:02d}".format(year))+' to '
        else:
            start = ("{:02d}".format(month+1))+'/'+("{:02d}".format(year-1))+' to '
        end = ("{:02d}".format(month))+'/'+str(year)
        # Plot Texts
        ax.text(492900, 5461800, (start), fontsize=18, fontweight='bold', color='white', alpha=0.5, fontfamily = 'sans-serif')
        ax.text(495750, 5461800, (end), fontsize=22, fontweight='heavy', color='white', alpha=0.8, fontfamily = 'sans-serif')
        ax.text(487900, 5448000, r'VANCOUVER, BC, CANADA', fontsize=20, color='white')
        ax.text(499000, 5446000, r'source: https://data.vancouver.ca/datacatalogue/crime-data.htm', fontsize=14, color='white', horizontalalignment='center')
        ax.text(498500, 5462990, r'tbc', fontsize=7, color='white', horizontalalignment='center')
        
        # Append filename to list 
        img_list.append(str(year) + '_' + str(month))
        # remove axis, save and close plot figure
        ax.axis('off')
        plt.savefig(str(year) + '_' + str(month) + '.png', facecolor='grey', edgecolor='none')
        plt.close(fig)



#Build and display gif
#build_gif(get_names(img_list), 'Theft_of_Vehicle_map_hex', 0.3)
#HTML('<img src="Theft_of_Vehicle_map_hex.gif">')

In [None]:
# Manually build the list
img_list = ['2003_12']
img_list.extend(['2003_12']*8)
for year in years[1:]:
    for month in months:
        img_list.append(str(year) + '_' + str(month))
img_list.extend(['2019_1'])
img_list.extend(['2019_2'])
img_list.extend(['2019_3'])
img_list.extend(['2019_4'])
img_list.extend(['2019_5']*8)

In [None]:
import moviepy.editor as mp
#clip = mp.VideoFileClip("Theft_of_Vehicle_map_hex.gif")
clip_list = [s + '.png' for s in img_list]

clip = mp.ImageSequenceClip(clip_list, fps=8)
clip.write_videofile("myvideo.mp4")

In [None]:
# Same thing as before but without the loop
# Used this to try different styles and build the 2003 and 2019 images


bnd_df = gpd.read_file('city_boundary.shp')
ps_df = gpd.read_file('public_streets.shp')
non_city_street_df = gpd.read_file('non_city_streets.shp')
df_hex = df.loc[df['TYPE']=='Break and Enter Commercial']

cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["grey","lightsalmon","coral","orangered","red"])

fontdict = {'fontsize': 32, 
            'fontweight' : 100, 
            'color' : 'white',
            'verticalalignment': 'baseline',
            'horizontalalignment': 'center'}

vmin=0
vmax=10
img_list = []

year = 2019
month = 5

df_temp = df_hex.copy()
df_temp = df_temp[(((df_temp.YEAR==(year-1)) & (df_temp.MONTH>month)) | 
                ((df_temp.YEAR==year) & (df_temp.MONTH<=month)))]

# Setup figure and axis
fig, ax = plt.subplots(1, figsize=(25, 20), facecolor='grey')
ax.set_aspect('equal')
# plot streets and boundaries
lbnd_df.plot(linewidth=1, edgecolor='0.1', ax=ax, facecolor='None', alpha=0.6)
ps_df.plot(linewidth=1, edgecolor='white', ax=ax, facecolor='None', alpha=0.4)
non_city_street_df.plot(linewidth=1, edgecolor='white', ax=ax, facecolor='None', alpha=0.4)
bnd_df.plot(linewidth=0.5, edgecolor='0.1', ax=ax, color = 'Blue', alpha=0.6)
# plot hexbin
df_temp.plot(kind='hexbin',x='X',y='Y',gridsize=64, extent=[483000,498500,5448900,5463000], edgecolor='0.4', 
            figsize=(16, 12), cmap=cmap, ax=ax, vmin=vmin, vmax=vmax, colorbar=False)
# plot colorbar
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="3%", pad=0.05)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
cbar = fig.colorbar(sm, cax=cax)
cbar.ax.tick_params(labelsize=14)
cbar.ax.set_yticklabels(['0', '2', '4', '6', '8', '10', '12', '14' '> 15'], color='white')
# plot title
ax.set_title('THEFTS OF BICYCLE', fontdict = fontdict)
# build text for displaying the month/year, build and append plot pic filename to a list
if (month==12):
    start = ("{:02d}".format(month-11))+'/'+("{:02d}".format(year))+' to '
else:
    start = ("{:02d}".format(month+1))+'/'+("{:02d}".format(year-1))+' to '
end = ("{:02d}".format(month))+'/'+str(year)
# Plot Texts
ax.text(492900, 5461800, (start), fontsize=18, fontweight='bold', color='white', alpha=0.5, fontfamily = 'sans-serif')
ax.text(495750, 5461800, (end), fontsize=22, fontweight='heavy', color='white', alpha=0.8, fontfamily = 'sans-serif')
ax.text(487900, 5448000, r'VANCOUVER, BC, CANADA', fontsize=20, color='white')
ax.text(498800, 5446000, r'source: https://data.vancouver.ca/datacatalogue/crime-data.htm', fontsize=14, color='white', horizontalalignment='center')
ax.text(498500, 5462990, r'tbc', fontsize=7, color='white', horizontalalignment='center')
# remove axis, save and close plot figure
ax.axis('off')
plt.savefig(str(year) + '_' + str(month) + '.png', facecolor='grey', edgecolor='none')