# Part 1

### Importing Libraries...

In [1]:
import pandas as pd 
import numpy as np
import html5lib

!pip install lxml
import lxml  

import requests

!pip install beautifulsoup4
from bs4 import BeautifulSoup

print('All Imported')

All Imported


### Loading Wiki table in a df

In [2]:
Toronto = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

soup = BeautifulSoup(Toronto.content,'lxml')

table = soup.find_all('table')[0]

df = pd.read_html(str(table))[0]

df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Dropping Not Assigned borough and resetting df

In [3]:
df["Borough"].replace('Not assigned', np.nan, inplace=True)
df.dropna(subset=["Borough"], axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Grouping Neighborhoods

In [4]:
df.groupby(['Postal Code' , 'Borough'])['Neighbourhood'].apply(','.join)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Replacing Not Assigned Neighborhoods by Borough

In [5]:
df['Neighbourhood'] = np.where(df['Neighbourhood']== 'Not assigned', df['Borough'], df['Neighbourhood'])
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Printing shape

In [6]:
df.shape

(103, 3)

# Part 2

### Downloading Geoloc data and merging with original dataframe

In [7]:
!wget -q -O 'Geospatial_Coordinates.csv' http://cocl.us/Geospatial_data
print('Data downloaded!')

Data downloaded!


In [8]:
geoloc_df = pd.read_csv('Geospatial_Coordinates.csv')
geoloc_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [9]:
toronto_df = df.merge(geoloc_df, how = 'inner', on = ['Postal Code', 'Postal Code'])
toronto_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


# Part 3

### Making a map of Toronto using Folium and pin pointing the boroughs onto that map

In [15]:
!conda install -c conda-forge folium=0.11.0 --yes
import folium

print('Folium installed and imported!')

Solving environment: - ^C
failed

CondaError: KeyboardInterrupt



ModuleNotFoundError: No module named 'folium'

In [11]:
latitude = 43.651070
longitude = -79.347015
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, borough, neighbourhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

In [10]:
import csv
!wget -q -O 'MCI_2014_to_2019.csv' https://opendata.arcgis.com/datasets/0c5fa2b642214e8baf0601405abccf30_0.csv?outSR=%7B%22latestWkid%22%3A3857%2C%22wkid%22%3A102100%7D
print('Data downloaded!')

Data downloaded!


In [11]:
crimes_df = pd.read_csv('MCI_2014_to_2019.csv')
crimes_df.head()

Unnamed: 0,X,Y,Index_,event_unique_id,occurrencedate,reporteddate,premisetype,ucr_code,ucr_ext,offence,...,occurrencedayofyear,occurrencedayofweek,occurrencehour,MCI,Division,Hood_ID,Neighbourhood,Long,Lat,ObjectId
0,-8816401.0,5434587.0,701,GO-20141756319,2014/03/24 00:00:00+00,2014/03/24 00:00:00+00,Commercial,1430,100,Assault,...,83.0,Monday,1,Assault,D42,132,Malvern (132),-79.199081,43.800281,1
1,-8837252.0,5413357.0,901,GO-20143006885,2014/09/27 00:00:00+00,2014/09/29 00:00:00+00,Other,2120,200,B&E,...,270.0,Saturday,16,Break and Enter,D52,76,Bay Street Corridor (76),-79.386383,43.662472,2
2,-8862433.0,5422276.0,702,GO-20141756802,2014/03/24 00:00:00+00,2014/03/24 00:00:00+00,Commercial,2120,200,B&E,...,83.0,Monday,6,Break and Enter,D23,1,West Humber-Clairville (1),-79.612595,43.720406,3
3,-8833104.0,5431887.0,703,GO-20141760570,2014/03/24 00:00:00+00,2014/03/24 00:00:00+00,Apartment,2120,200,B&E,...,83.0,Monday,15,Break and Enter,D33,47,Don Valley Village (47),-79.349121,43.782772,4
4,-8845311.0,5413667.0,902,GO-20142004859,2014/05/03 00:00:00+00,2014/05/03 00:00:00+00,Commercial,1610,210,Robbery - Business,...,123.0,Saturday,2,Robbery,D11,90,Junction Area (90),-79.458778,43.66449,5


In [14]:
crimes2_df = crimes_df[['MCI', 'occurrenceyear', 'Lat', 'Long', 'Neighbourhood']]
crimes2_df.tail(5)

Unnamed: 0,MCI,occurrenceyear,Lat,Long,Neighbourhood
206430,Auto Theft,2019.0,43.71069,-79.61132,West Humber-Clairville (1)
206431,Auto Theft,2019.0,43.610126,-79.556908,Islington-City Centre West (14)
206432,Auto Theft,2019.0,43.67128,-79.37542,North St.James Town (74)
206433,Auto Theft,2019.0,43.700653,-79.497994,Brookhaven-Amesbury (30)
206434,Auto Theft,2019.0,43.668201,-79.487038,Rockcliffe-Smythe (111)


In [13]:
crimes2_df['occurrenceyear'] = crimes2_df['occurrenceyear'].astype(int)

ValueError: Cannot convert non-finite values (NA or inf) to integer

In [54]:
crimes2_df.iloc['occurrenceyear'] = 2019.0
crimes2_df.head()

ValueError: Can only index by location with a [integer, integer slice (START point is INCLUDED, END point is EXCLUDED), listlike of integers, boolean array]

In [33]:
year = crimes2_df[ (crimes2_df['occurrenceyear'] <= 2018.5)].index
df.drop(year , inplace=True)

KeyError: '[   103    104    105 ... 205917 205918 205919] not found in axis'

In [24]:
lastyear_df = df[(df['occurrenceyear'] !== '2019.0')
                 

(206435, 5)

In [None]:
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

In [15]:
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-2.0.0                |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          97 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.50-py_0          conda-forge
    geopy:         2.0.0-pyh9f0ad1d_0 conda-forge


Downloading and Extracting Packages
geographiclib-1.50   | 34 KB     | ##################################### | 100% 
geopy-2.0.0          | 63 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done


In [16]:
from pandas.io.json import json_normalize
print('All imported')

All imported


In [17]:
from IPython.display import Image 
from IPython.core.display import HTML

In [39]:
CLIENT_ID = 'XSFJHKKTDE3HAFUSTBC0LNZ2ODEUADQZTVCADBTUDHVZAPSV'
CLIENT_SECRET = 'O55ZJHLXQ43GSSMZCYUHBHWXEGHZBRQQPWAWJPWPEOG0PCMO'
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: XSFJHKKTDE3HAFUSTBC0LNZ2ODEUADQZTVCADBTUDHVZAPSV
CLIENT_SECRET:O55ZJHLXQ43GSSMZCYUHBHWXEGHZBRQQPWAWJPWPEOG0PCMO


In [40]:
address = '88 McNabb St, ON'

geolocator = Nominatim(user_agent="coursera-capstone-project")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

46.5219473 -84.3233836


In [49]:
#french_restaurant_id = '4bf58dd8d48988d10c941735'
french_restaurant_id = 'French'
radius = 20000
print(french_restaurant_id + ' .... OK!')

French .... OK!


In [50]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, french_restaurant_id, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=XSFJHKKTDE3HAFUSTBC0LNZ2ODEUADQZTVCADBTUDHVZAPSV&client_secret=O55ZJHLXQ43GSSMZCYUHBHWXEGHZBRQQPWAWJPWPEOG0PCMO&ll=46.5219473,-84.3233836&v=20180604&query=French&radius=20000&limit=30'

In [51]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f52e25930e7ef00415d4802'},
 'response': {'venues': [{'id': '4bdef8436198c9b6467d15ff',
    'name': "St. Mary's French Immersion",
    'location': {'lat': 46.527943,
     'lng': -84.259557,
     'labeledLatLngs': [{'label': 'display',
       'lat': 46.527943,
       'lng': -84.259557}],
     'distance': 4933,
     'cc': 'CA',
     'country': 'Canada',
     'formattedAddress': ['Canada']},
    'categories': [{'id': '4bf58dd8d48988d1a8941735',
      'name': 'General College & University',
      'pluralName': 'General Colleges & Universities',
      'shortName': 'Education',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/education/other_',
       'suffix': '.png'},
      'primary': True}],
    'referralId': 'v-1599267125',
    'hasPerk': False},
   {'id': '5377a21b498ec1ca19af1caf',
    'name': 'Frenchies Fries',
    'location': {'lat': 46.56907613199578,
     'lng': -84.31996855287444,
     'labeledLatLngs': [{'label': 'display',
   

In [48]:
venues = results['response']['venues']

dataframe = pd.json_normalize(venues)
dataframe.head()

AttributeError: module 'pandas' has no attribute 'json_normalize'

In [22]:
address2 = 'Clermont-Ferrant, France'

geolocator = Nominatim(user_agent="coursera-capstone-project")
location = geolocator.geocode(address2)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

50.4274894 3.0654023


In [None]:
pip install folium==0.5.0

!conda install -c conda-forge geopy --yes 

# The end