# Team Project

In [1]:
# libraries to import
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")
%matplotlib inline

In [2]:
## read datas

# crime
crime_raw = pd.read_csv("Neighbourhood_Crime_Rates.csv")

# neighbourhood data
neighbourhood_raw = pd.read_csv("neighbourhood-profiles-2016-140-model.csv")


In [3]:
# keep relevant variables for analysis. since the other csv contains data from 2016, only focus on 2016 data
crime_2016 = crime_raw[['Neighbourhood', 'Assault_2016', 'Assault_Rate2016', 'AutoTheft_2016', 'AutoTheft_Rate2016',
                      'BreakAndEnter_2016', 'BreakAndEnter_Rate2016', 'Robbery_2016', 'Robbery_Rate2016',
                      'TheftOver_2016', 'TheftOver_Rate2016', 'Homicide_2016', 'Homicide_Rate2016',
                      'Shootings_2016', 'Shootings_Rate2016']]

In [4]:
%%capture --no-display
# David's heat map stuff
tor_map = "Neighbourhoods.shp"
# read .shp file and make a common joining factor
regions = gpd.read_file(tor_map)
regions['neighbourhood'] = regions['FIELD_7'].str.replace(' \(.+\)', '')
# keep coordinates, polygons, and neighbourhood name
regions_cleaned = regions[['FIELD_11', 'FIELD_12', 'geometry', 'neighbourhood']]
regions_cleaned = regions_cleaned.rename(columns={'FIELD_11':'longitute', 'FIELD_12':'latitude'})


In [5]:
# join crime data and heat map data
crime_regions = regions_cleaned.set_index('neighbourhood').join(crime_2016.set_index('Neighbourhood'))
crime_regions = crime_regions.reset_index()
crime_regions = crime_regions.fillna(0)

# create a total crimes
crime_regions['TotalCrimes_2016'] = crime_regions['Assault_2016']+ \
                            crime_regions['AutoTheft_2016']+ \
                            crime_regions['BreakAndEnter_2016']+ \
                            crime_regions['Robbery_2016']+ \
                            crime_regions['TheftOver_2016']+ \
                            crime_regions['Homicide_2016']+ \
                            crime_regions['Shootings_2016']


In [6]:
# Unpivot the neighbourhood dataframe
neighbourhood = pd.melt(neighbourhood_raw, id_vars=['Category','Topic','Data Source','Characteristic'])

In [7]:
# cleaning neighbourhood
# remove rows that are not neighbourhood

# remove rows whith variable = '_id'
neighbourhood = neighbourhood.drop(
    neighbourhood[neighbourhood["variable"] == '_id'].index
)

# remove rows whith variable = 'City of Toronto'
neighbourhood = neighbourhood.drop(
    neighbourhood[neighbourhood["variable"] == 'City of Toronto'].index
)

# now in the neighbourhood we have only neighbourhood

In [8]:
# ajust column names
neighbourhood = neighbourhood.rename(columns={'Category': 'category', 'Topic': 'topic', 'Data Source': 'data_source', 'Characteristic':'characteristic', 'variable': 'neighbourhood'})


In [9]:
# return a dataframe filtering values from neighbourhood dataframme
def neighbourhoodDataframeValueFiltered(column, filter):
    newDataframe = neighbourhood[['characteristic','neighbourhood','value']].query('characteristic == "'+filter+'"')
    
    # adjust column name
    newDataframe = newDataframe.rename(columns={'value': column})

    # adjust type of column to int
    newDataframe[column] = newDataframe[column].str.replace(',', '')
    newDataframe = newDataframe.astype({column: int})
    newDataframe = newDataframe.drop('characteristic', axis=1)
    
    # return dataFrame 
    return newDataframe

In [10]:
# add population to crime_regions
#crime_regions = crime_regions.merge(neighbourhoodValueFiltered('population1', 'Population, 2016'), left_on='neighbourhood', right_on='neighbourhood')
crime_regions = crime_regions.merge(neighbourhoodDataframeValueFiltered('population', 'Population, 2016'), left_on='neighbourhood', right_on='neighbourhood')


In [11]:
# add children to crime_regions
# I used the 'Children (0-14 years)' characteristic
crime_regions = crime_regions.merge(neighbourhoodDataframeValueFiltered('children', 'Children (0-14 years)'), left_on='neighbourhood', right_on='neighbourhood')


In [12]:
# add youth to dataframe 
# I used the 'Youth (15-24 years)' characteristic
crime_regions = crime_regions.merge(neighbourhoodDataframeValueFiltered('youth', 'Youth (15-24 years)'), left_on='neighbourhood', right_on='neighbourhood')


In [13]:
# add working age to dataframe 
# I used the 'Youth (15-24 years)' characteristic
crime_regions = crime_regions.merge(neighbourhoodDataframeValueFiltered('workingAge', 'Working Age (25-54 years)'), left_on='neighbourhood', right_on='neighbourhood')


In [14]:
# add Pre-retirement to dataframe 
# I used the 'Pre-retirement (55-64 years)' characteristic
crime_regions = crime_regions.merge(neighbourhoodDataframeValueFiltered('preRetirement', 'Pre-retirement (55-64 years)'), left_on='neighbourhood', right_on='neighbourhood')


In [15]:
# add Senior to crime_regions
# I used the 'Seniors (65+ years)' characteristic
crime_regions = crime_regions.merge(neighbourhoodDataframeValueFiltered('seniors', 'Seniors (65+ years)'), left_on='neighbourhood', right_on='neighbourhood')


In [16]:
# add Older Senior to dataframe 
# I used the 'Pre-retirement (55-64 years)' characteristic
crime_regions = crime_regions.merge(neighbourhoodDataframeValueFiltered('olderSeniors', 'Older Seniors (85+ years)'), left_on='neighbourhood', right_on='neighbourhood')


In [17]:
# add seniors living alone to crime_regions
# I used the 'Persons age 65+ living alone (total)' characteristic

crime_regions = crime_regions.merge(neighbourhoodDataframeValueFiltered('seniors_alone', 'Persons age 65+ living alone (total)'), left_on='neighbourhood', right_on='neighbourhood')


In [18]:
# add income to crime_regions
# I used the 'Total income: Average amount ($)' characteristic

crime_regions = crime_regions.merge(neighbourhoodDataframeValueFiltered('income', 'Total income: Average amount ($)'), left_on='neighbourhood', right_on='neighbourhood')


In [19]:
# add eduaction to crime_regions
# I used the 'Total - Highest certificate, diploma or degree for the population aged 15 years and over in private households - 25% sample data' characteristic

crime_regions = crime_regions.merge(neighbourhoodDataframeValueFiltered('education', 'Total - Highest certificate, diploma or degree for the population aged 15 years and over in private households - 25% sample data'), left_on='neighbourhood', right_on='neighbourhood')


In [20]:
crime_regions

Unnamed: 0,neighbourhood,longitute,latitude,geometry,Assault_2016,Assault_Rate2016,AutoTheft_2016,AutoTheft_Rate2016,BreakAndEnter_2016,BreakAndEnter_Rate2016,...,population,children,youth,workingAge,preRetirement,seniors,olderSeniors,seniors_alone,income,education
0,Wychwood,-79.425515,43.676919,"POLYGON ((-79.43592 43.68015, -79.43492 43.680...",80.0,540.5771,18.0,121.62980,30.0,202.71640,...,14349,1860,1320,6420,1595,3150,880,690,54460,11385
1,Yonge-Eglinton,-79.403590,43.704689,"POLYGON ((-79.41096 43.70408, -79.40962 43.704...",67.0,547.5646,4.0,32.69042,19.0,155.27950,...,11817,1800,1225,5860,1325,1600,165,595,89330,10000
2,Yonge-St.Clair,-79.397871,43.687859,"POLYGON ((-79.39119 43.68108, -79.39141 43.680...",34.0,262.5482,7.0,54.05405,12.0,92.66409,...,12528,1210,920,5960,1540,2905,470,1025,114174,11100
3,York University Heights,-79.488883,43.765736,"POLYGON ((-79.50529 43.75987, -79.50488 43.759...",363.0,1269.3200,106.0,370.65530,98.0,342.68130,...,27593,4045,4750,12290,2965,3530,400,720,29958,23520
4,Yorkdale-Glen Park,-79.457108,43.714672,"POLYGON ((-79.43969 43.70561, -79.44011 43.705...",175.0,1149.1990,41.0,269.24090,66.0,433.41210,...,14804,1960,1870,5860,1810,3295,775,610,38527,12065
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,Kennedy Park,-79.260382,43.725556,"POLYGON ((-79.24549 43.73060, -79.24555 43.730...",212.0,1201.4050,12.0,68.00408,46.0,260.68230,...,17123,2775,2175,7120,2510,2550,330,645,30974,14325
132,Kensington-Chinatown,-79.397240,43.653554,"POLYGON ((-79.40401 43.64719, -79.40419 43.647...",377.0,2017.0140,31.0,165.85520,81.0,433.36360,...,17945,1315,3350,8760,1795,2705,480,1040,37422,16155
133,Kingsview Village-The Westway,-79.547863,43.698993,"POLYGON ((-79.55236 43.70947, -79.55229 43.709...",128.0,564.6478,42.0,185.27500,34.0,149.98460,...,22000,4240,3020,8635,2550,3585,575,805,36674,17735
134,Kingsway South,-79.510577,43.653520,"POLYGON ((-79.51703 43.64611, -79.51717 43.646...",29.0,305.3274,9.0,94.75679,27.0,284.27040,...,9271,1580,1100,3245,1345,1995,325,460,144642,7580


In [None]:
# Heat Map of Assaults by Neighbourhood in Toronto
# make the plot
fig, ax = plt.subplots(1, figsize=(40, 20))
ax.axis('off')
ax.set_title('Heat Map of Assaults by Neighbourhood in Toronto, Ontario', fontdict={'fontsize': '40', 'fontweight' : '3'})

color = 'Oranges'
vmin, vmax = 0, 231
sm = plt.cm.ScalarMappable(cmap=color, norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
cbar = fig.colorbar(sm)
cbar.ax.tick_params(labelsize=20)

crime_regions.plot('Assault_2016', cmap=color, linewidth=0.8, ax=ax, edgecolor='0.8', figsize=(40,20))
for idx, row in crime_regions.iterrows():
    if(row['Assault_2016'] > 250):
        plt.annotate(text=row['neighbourhood'], xy=(row['longitute'], row['latitude']),
                 horizontalalignment='center', fontsize='large', color='black', wrap=True)
plt.show()

In [None]:
# Heat Map of Crimes by Neighbourhood in Toronto
fig, ax = plt.subplots(1, figsize=(40, 20))
ax.axis('off')
ax.set_title('Heat Map of Crimes by Neighbourhood in Toronto, Ontario', fontdict={'fontsize': '40', 'fontweight' : '3'})

#color = 'Oranges'
color = 'Reds'
vmin, vmax = 0, crime_regions['TotalCrimes_2016'].max()
sm = plt.cm.ScalarMappable(cmap=color, norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
cbar = fig.colorbar(sm)
cbar.ax.tick_params(labelsize=20)

crime_regions.plot('TotalCrimes_2016', cmap=color, linewidth=0.8, ax=ax, edgecolor='0.8', figsize=(40,20))
for idx, row in crime_regions.iterrows():
    if(row['TotalCrimes_2016'] > crime_regions['TotalCrimes_2016'].mean()):
        plt.annotate(text=row['neighbourhood'], xy=(row['longitute'], row['latitude']),
                 horizontalalignment='center', fontsize='large', color='black', wrap=True)
plt.show()

In [None]:
# assaults top 10 bar graph
crime_regions = crime_regions.sort_values(['Assault_2016'], ascending=False)

fig, ax = plt.subplots()

ax.bar(crime_regions['neighbourhood'].iloc[:10], crime_regions['Assault_2016'].iloc[:10])

ax.set_ylabel('Assaults')
ax.set_title('Number of assaults by neighbourhood (top 10)')

plt.show()

In [None]:
# homicides
crime_regions = crime_regions.sort_values(['Homicide_2016'], ascending=False)

fig, ax = plt.subplots()
ax.bar(crime_regions['neighbourhood'].iloc[:10], crime_regions['Homicide_2016'].iloc[:10])
ax.set_ylabel('Assaults')
ax.set_title('Number of homicidee by neighbourhood (top 10)')

plt.show()

In [None]:
plt.rc('figure', figsize=(12, 4))

x = np.arange(len(crime_regions['neighbourhood'].iloc[:5]))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()

#rects1 = ax.bar(x - width/2, men_means, width, label='Men')
assault = ax.bar(x - width/2, crime_regions['Assault_2016'].iloc[:5], width, label='Assault')
homicide = ax.bar(x + width/2, crime_regions['BreakAndEnter_2016'].iloc[:5], width, label='BreakAndEnter')
#rects2 = ax.bar(x + width/2, women_means, width, label='Women')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Crimes')
ax.set_title('Assault/Break and Enter by neighbourhood')
ax.set_xticks(x, crime_regions['neighbourhood'].iloc[:5])
ax.legend()

ax.bar_label(assault, padding=3)
ax.bar_label(homicide, padding=3)

fig.tight_layout()

plt.show()

In [None]:
crime_regions