# Team Project

In [1]:
# libraries to import
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")
%matplotlib inline

In [2]:
## read datas

# crime
crime_raw = pd.read_csv("Neighbourhood_Crime_Rates.csv")

# neighbourhood data
neighbourhood_raw = pd.read_csv("neighbourhood-profiles-2016-140-model.csv")


In [3]:
# keep relevant variables for analysis. since the other csv contains data from 2016, only focus on 2016 data
crime_2016 = crime_raw[['Neighbourhood', 'Assault_2016', 'Assault_Rate2016', 'AutoTheft_2016', 'AutoTheft_Rate2016',
                      'BreakAndEnter_2016', 'BreakAndEnter_Rate2016', 'Robbery_2016', 'Robbery_Rate2016',
                      'TheftOver_2016', 'TheftOver_Rate2016', 'Homicide_2016', 'Homicide_Rate2016',
                      'Shootings_2016', 'Shootings_Rate2016']]

In [4]:
%%capture --no-display
# David's heat map stuff
tor_map = "Neighbourhoods.shp"
# read .shp file and make a common joining factor
regions = gpd.read_file(tor_map)
regions['neighbourhood'] = regions['FIELD_7'].str.replace(' \(.+\)', '')
# keep coordinates, polygons, and neighbourhood name
regions_cleaned = regions[['FIELD_11', 'FIELD_12', 'geometry', 'neighbourhood']]
regions_cleaned = regions_cleaned.rename(columns={'FIELD_11':'longitute', 'FIELD_12':'latitude'})


In [5]:
# join crime data and heat map data
crime_regions = regions_cleaned.set_index('neighbourhood').join(crime_2016.set_index('Neighbourhood'))
crime_regions = crime_regions.reset_index()
crime_regions = crime_regions.fillna(0)

# create a total crimes
crime_regions['TotalCrimes_2016'] = crime_regions['Assault_2016']+ \
                            crime_regions['AutoTheft_2016']+ \
                            crime_regions['BreakAndEnter_2016']+ \
                            crime_regions['Robbery_2016']+ \
                            crime_regions['TheftOver_2016']+ \
                            crime_regions['Homicide_2016']+ \
                            crime_regions['Shootings_2016']


In [6]:
# Unpivot the neighbourhood dataframe
neighbourhood = pd.melt(neighbourhood_raw, id_vars=['Category','Topic','Data Source','Characteristic'])

In [7]:
# cleaning neighbourhood
# remove rows that are not neighbourhood

# remove rows whith variable = '_id'
neighbourhood = neighbourhood.drop(
    neighbourhood[neighbourhood["variable"] == '_id'].index
)

# remove rows whith variable = 'City of Toronto'
neighbourhood = neighbourhood.drop(
    neighbourhood[neighbourhood["variable"] == 'City of Toronto'].index
)

# now in the neighbourhood we have only neighbourhood

In [8]:
# ajust column names
neighbourhood = neighbourhood.rename(columns={'Category': 'category', 'Topic': 'topic', 'Data Source': 'data_source', 'Characteristic':'characteristic', 'variable': 'neighbourhood'})


In [9]:
# function too add a new column to original dataframe by filtering characteristic of neighbourhood dataframe
def addColumnDataframe(column, characteristic, dataFrame):
    newDataframe = neighbourhood[['characteristic','neighbourhood','value']].query('characteristic == "'+characteristic+'"')
    
    # adjust column name
    newDataframe = newDataframe.rename(columns={'value': column})

    # adjust type of column to int
    newDataframe[column] = newDataframe[column].str.replace(',', '')
    newDataframe = newDataframe.astype({column: int})
    newDataframe = newDataframe.drop('characteristic', axis=1)
    newDataframe = dataFrame.set_index('neighbourhood').join(newDataframe.set_index('neighbourhood'))
    
    # return dataFrame with nw column
    return newDataframe

In [11]:
# add population to crime_regions
crime_regions = addColumnDataframe('population', 'Population, 2016',crime_regions)

In [12]:
crime_regions

Unnamed: 0_level_0,longitute,latitude,geometry,Assault_2016,Assault_Rate2016,AutoTheft_2016,AutoTheft_Rate2016,BreakAndEnter_2016,BreakAndEnter_Rate2016,Robbery_2016,Robbery_Rate2016,TheftOver_2016,TheftOver_Rate2016,Homicide_2016,Homicide_Rate2016,Shootings_2016,Shootings_Rate2016,TotalCrimes_2016,population
neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Wychwood,-79.425515,43.676919,"POLYGON ((-79.43592 43.68015, -79.43492 43.680...",80.0,540.5771,18.0,121.62980,30.0,202.71640,9.0,60.81492,4.0,27.028850,0.0,0.000000,0.0,0.000000,141.0,14349.0
Yonge-Eglinton,-79.403590,43.704689,"POLYGON ((-79.41096 43.70408, -79.40962 43.704...",67.0,547.5646,4.0,32.69042,19.0,155.27950,14.0,114.41650,2.0,16.345210,0.0,0.000000,0.0,0.000000,106.0,11817.0
Yonge-St.Clair,-79.397871,43.687859,"POLYGON ((-79.39119 43.68108, -79.39141 43.680...",34.0,262.5482,7.0,54.05405,12.0,92.66409,6.0,46.33205,7.0,54.054050,0.0,0.000000,1.0,0.772201,67.0,12528.0
York University Heights,-79.488883,43.765736,"POLYGON ((-79.50529 43.75987, -79.50488 43.759...",363.0,1269.3200,106.0,370.65530,98.0,342.68130,70.0,244.77240,38.0,132.876400,2.0,0.699350,4.0,1.398699,681.0,27593.0
Yorkdale-Glen Park,-79.457108,43.714672,"POLYGON ((-79.43969 43.70561, -79.44011 43.705...",175.0,1149.1990,41.0,269.24090,66.0,433.41210,24.0,157.60440,26.0,170.738100,1.0,0.656685,3.0,1.970055,336.0,14804.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Kennedy Park,-79.260382,43.725556,"POLYGON ((-79.24549 43.73060, -79.24555 43.730...",212.0,1201.4050,12.0,68.00408,46.0,260.68230,27.0,153.00920,5.0,28.335030,0.0,0.000000,2.0,1.133401,304.0,17123.0
Kensington-Chinatown,-79.397240,43.653554,"POLYGON ((-79.40401 43.64719, -79.40419 43.647...",377.0,2017.0140,31.0,165.85520,81.0,433.36360,62.0,331.71040,20.0,107.003400,5.0,2.675084,5.0,2.675084,581.0,17945.0
Kingsview Village-The Westway,-79.547863,43.698993,"POLYGON ((-79.55236 43.70947, -79.55229 43.709...",128.0,564.6478,42.0,185.27500,34.0,149.98460,27.0,119.10540,5.0,22.056550,2.0,0.882262,14.0,6.175835,252.0,22000.0
Kingsway South,-79.510577,43.653520,"POLYGON ((-79.51703 43.64611, -79.51717 43.646...",29.0,305.3274,9.0,94.75679,27.0,284.27040,13.0,136.87090,2.0,21.057060,0.0,0.000000,0.0,0.000000,80.0,9271.0


In [None]:
# add Senior to crime_regions
# I used the 'Seniors (65+ years)' characteristic

# add population to crime_regions
crime_regions = addColumnDataframe('seniors', 'Seniors (65+ years)',crime_regions)

In [None]:
#seniors living alone dataframe
# I used the 'Persons age 65+ living alone (total)' characteristic

seniors_alone = neighbourhood[['characteristic','neighbourhood','value']].query('characteristic == "Persons age 65+ living alone (total)"')

# adjust column name
seniors_alone = seniors_alone.rename(columns={'value': 'seniors_alone'})

# adjust type of column to int
seniors_alone['seniors_alone'] = seniors_alone['seniors_alone'].str.replace(',', '')
seniors_alone = seniors_alone.astype({'seniors_alone': int})


# add population to crime_regions dataframe
crime_regions = crime_regions.set_index('neighbourhood').join(seniors_alone[['neighbourhood','seniors_alone']].set_index('neighbourhood'))
crime_regions = crime_regions.reset_index()

In [None]:
# income dataframe 
# I used the 'Total income: Average amount ($)' characteristic

income = neighbourhood[['characteristic','neighbourhood','value']].query('characteristic == "Total income: Average amount ($)"')

# adjust column name
income = income.rename(columns={'value': 'income'})

# adjust type of column to int
income['income'] = income['income'].str.replace(',', '')
income = income.astype({'income': int})


# add population to crime_regions dataframe
crime_regions = crime_regions.set_index('neighbourhood').join(income[['neighbourhood','income']].set_index('neighbourhood'))
crime_regions = crime_regions.reset_index()

In [None]:
# eduaction  dataframe 
# I used the 'Total - Highest certificate, diploma or degree for the population aged 15 years and over in private households - 25% sample data' characteristic

eduaction = neighbourhood[['characteristic','neighbourhood','value']].query('characteristic == "Total - Highest certificate, diploma or degree for the population aged 15 years and over in private households - 25% sample data"')

# adjust column name
eduaction = eduaction.rename(columns={'value': 'eduaction'})

# adjust type of column to int
eduaction['eduaction'] = eduaction['eduaction'].str.replace(',', '')
eduaction = eduaction.astype({'eduaction': int})


# add population to crime_regions dataframe
crime_regions = crime_regions.set_index('neighbourhood').join(eduaction[['neighbourhood','eduaction']].set_index('neighbourhood'))
crime_regions = crime_regions.reset_index()

In [None]:
# Heat Map of Assaults by Neighbourhood in Toronto
# make the plot
fig, ax = plt.subplots(1, figsize=(40, 20))
ax.axis('off')
ax.set_title('Heat Map of Assaults by Neighbourhood in Toronto, Ontario', fontdict={'fontsize': '40', 'fontweight' : '3'})

color = 'Oranges'
vmin, vmax = 0, 231
sm = plt.cm.ScalarMappable(cmap=color, norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
cbar = fig.colorbar(sm)
cbar.ax.tick_params(labelsize=20)

crime_regions.plot('Assault_2016', cmap=color, linewidth=0.8, ax=ax, edgecolor='0.8', figsize=(40,20))
for idx, row in crime_regions.iterrows():
    if(row['Assault_2016'] > 250):
        plt.annotate(text=row['neighbourhood'], xy=(row['longitute'], row['latitude']),
                 horizontalalignment='center', fontsize='large', color='black', wrap=True)
plt.show()

In [None]:
# Heat Map of Crimes by Neighbourhood in Toronto
fig, ax = plt.subplots(1, figsize=(40, 20))
ax.axis('off')
ax.set_title('Heat Map of Crimes by Neighbourhood in Toronto, Ontario', fontdict={'fontsize': '40', 'fontweight' : '3'})

#color = 'Oranges'
color = 'Reds'
vmin, vmax = 0, crime_regions['TotalCrimes_2016'].max()
sm = plt.cm.ScalarMappable(cmap=color, norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
cbar = fig.colorbar(sm)
cbar.ax.tick_params(labelsize=20)

crime_regions.plot('TotalCrimes_2016', cmap=color, linewidth=0.8, ax=ax, edgecolor='0.8', figsize=(40,20))
for idx, row in crime_regions.iterrows():
    if(row['TotalCrimes_2016'] > crime_regions['TotalCrimes_2016'].mean()):
        plt.annotate(text=row['neighbourhood'], xy=(row['longitute'], row['latitude']),
                 horizontalalignment='center', fontsize='large', color='black', wrap=True)
plt.show()

In [None]:
# assaults top 10 bar graph
crime_regions = crime_regions.sort_values(['Assault_2016'], ascending=False)

fig, ax = plt.subplots()

ax.bar(crime_regions['neighbourhood'].iloc[:10], crime_regions['Assault_2016'].iloc[:10])

ax.set_ylabel('Assaults')
ax.set_title('Number of assaults by neighbourhood (top 10)')

plt.show()

In [None]:
# homicides
crime_regions = crime_regions.sort_values(['Homicide_2016'], ascending=False)

fig, ax = plt.subplots()
ax.bar(crime_regions['neighbourhood'].iloc[:10], crime_regions['Homicide_2016'].iloc[:10])
ax.set_ylabel('Assaults')
ax.set_title('Number of homicidee by neighbourhood (top 10)')

plt.show()

In [None]:
plt.rc('figure', figsize=(12, 4))

x = np.arange(len(crime_regions['neighbourhood'].iloc[:5]))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()

#rects1 = ax.bar(x - width/2, men_means, width, label='Men')
assault = ax.bar(x - width/2, crime_regions['Assault_2016'].iloc[:5], width, label='Assault')
homicide = ax.bar(x + width/2, crime_regions['BreakAndEnter_2016'].iloc[:5], width, label='BreakAndEnter')
#rects2 = ax.bar(x + width/2, women_means, width, label='Women')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Crimes')
ax.set_title('Assault/Break and Enter by neighbourhood')
ax.set_xticks(x, crime_regions['neighbourhood'].iloc[:5])
ax.legend()

ax.bar_label(assault, padding=3)
ax.bar_label(homicide, padding=3)

fig.tight_layout()

plt.show()

In [None]:
crime_regions