# Capstone project notebook

This notebook supports the [Coursera Applied Data Science Capstone](https://www.coursera.org/learn/applied-data-science-capstone) project.

In [1]:
import pandas as pd
import numpy as np
import folium

import requests

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

from geopy.geocoders import Nominatim # Nominatim is the open acces geolocation service that geopy uses.


In [2]:
print('Hello Capstone Project Course!')

Hello Capstone Project Course!


## Create a map of Toronto

Start by loading the previously prepared `.csv' file on Toronto neighbourhood locations

In [3]:
path = '~/Documents/Projects/Coursera-Capstone/Neighbourhoods.csv'
toronto_nbhs = pd.read_csv(path)

# remove the 'index' column which is explicitly saved (and therefore unnamed) in the '.csv'
toronto_nbhs = toronto_nbhs[['Postal Code', 'Borough', 'Neighbourhood', 'Latitude', 'Longitude']]

In [4]:
# Obtain the coordinates of Toronto

address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="coursera_capstone")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [5]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(toronto_nbhs['Latitude'], toronto_nbhs['Longitude'], toronto_nbhs['Borough'], toronto_nbhs['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto

### Load the previousy prepared clusterdata

In [6]:
path = '~/Documents/Projects/Coursera-Capstone/nbh_clusters.csv'
toronto_merged = pd.read_csv(path)

In [7]:
# create map
kclusters = 5
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Import the Toronto crime data

In [8]:
path = '~/Documents/Projects/Coursera-Capstone/Toronto crime/Neighbourhood Crime Rates.csv'
toronto_crime = pd.read_csv(path)

In [9]:
print(toronto_crime.shape)
toronto_crime.head()

(140, 62)


Unnamed: 0,_id,OBJECTID,Neighbourhood,Hood_ID,Population,Assault_2014,Assault_2015,Assault_2016,Assault_2017,Assault_2018,...,TheftOver_2016,TheftOver_2017,TheftOver_2018,TheftOver_2019,TheftOver_AVG,TheftOver_CHG,TheftOver_Rate_2019,Shape__Area,Shape__Length,geometry
0,1,16,South Parkdale,85,21849,202,226,231,229,220,...,9,10,9,22,10.0,1.44,100.7,2286974.0,10802.83216,"{u'type': u'Polygon', u'coordinates': (((-79.4..."
1,2,17,South Riverdale,70,27876,215,207,236,243,304,...,22,27,24,21,21.3,-0.13,75.3,10964570.0,43080.724701,"{u'type': u'Polygon', u'coordinates': (((-79.3..."
2,3,18,St.Andrew-Windfields,40,17812,53,41,48,45,55,...,8,7,6,6,8.5,0.0,33.7,7299580.0,13025.997456,"{u'type': u'Polygon', u'coordinates': (((-79.3..."
3,4,19,Taylor-Massey,61,15683,127,92,97,107,123,...,5,2,4,3,3.5,-0.25,19.1,1062970.0,5940.70005,"{u'type': u'Polygon', u'coordinates': (((-79.2..."
4,5,20,Humber Summit,21,12416,76,89,118,116,109,...,18,18,15,22,17.3,0.47,177.2,7966905.0,12608.573118,"{u'type': u'Polygon', u'coordinates': (((-79.5..."


In [10]:
#url = 'https://ckan0.cf.opendata.inter.prod-toronto.ca/download_resource/3d556fc2-ddab-4aa0-97e1-227707580ec6?format=geojson&projection=4326'
toronto_geo = r'Neighbourhood Crime Rates.geojson' #f'{url}'
toronto_geo


'Neighbourhood Crime Rates.geojson'

In [11]:
# generate choropleth map using the crime data for the city of Toronto
folium.Choropleth(
    geo_data=toronto_geo,
    data=toronto_crime,
    columns=['Neighbourhood', 'Homicide_Rate_2019'],
    key_on='feature.properties.Neighbourhood',
    fill_color='BuPu', 
    fill_opacity=0.1, 
    line_opacity=1,
    legend_name='Neighbourhood crime'
).add_to(map_clusters)
#folium.LayerControl().add_to(map_clusters)

# display map
map_clusters

In [12]:
toronto_crime[['_id', 'Assault_Rate_2019']]

Unnamed: 0,_id,Assault_Rate_2019
0,1,1148.8
1,2,936.3
2,3,325.6
3,4,777.9
4,5,950.4
...,...,...
135,136,320.2
136,137,504.9
137,138,786.3
138,139,593.5
