In [18]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup as bs

## Scraping data 

In [19]:
url = "https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=969510672"
html_data = requests.get(url).text # get html data
soup = bs(html_data,"html.parser") # convert html to bs4 object

In [20]:
data = soup.find("table", attrs={"class":"wikitable sortable"}) # Extract the table from the bs4 object

In [21]:
duplicated_df = pd.DataFrame(pd.read_html(str(data))[0]) #create dataFrame from the html table
duplicated_df.columns = ["PostalCode", "Borough", "Neighbourhood"] # Change columns names to match course documents
duplicated_df = duplicated_df.loc[duplicated_df["Borough"] != 'Not assigned'] # remove data where "Borough" is "Not Assigned"
duplicated_df.reset_index(inplace=True, drop=True) #reset index

In [22]:
df = pd.DataFrame(columns=["PostalCode", "Borough", "Neighbourhood"]) #create the data frame that won't have duplications

for i in duplicated_df.index: #loop using index
    #retrive the values for each column
    postal = duplicated_df.iloc[i,0]
    borough = duplicated_df.iloc[i,1]
    neighbour = duplicated_df.iloc[i,2]
    
    if postal in df["PostalCode"].values: #if the postal code already exist in the df adjust the neighbourhood name
        old_val = df["Neighbourhood"].loc[df["PostalCode"]==postal].values[0] #get the old neighbourhood name
        neighbour = old_val + ", " + neighbour # make the new neighbourhood name
        df = df.replace(old_val, neighbour) #replace neighbourhood name
        
    else: #if the postal code not in the df
        df = df.append([{"PostalCode":postal, "Borough":borough, "Neighbourhood":neighbour}]) # add the new data to the dataset
        
df.reset_index(inplace=True, drop=True) # reset index
df.head(12) #show headers

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


### Shape results for grade

In [23]:
df.shape

(103, 3)

## Getting Longitude and Latitude

###  First approach using "geocoder" liberary

In [24]:
# import geocoder # import geocoder

# df["Longitude"] = np.nan
# df["Latitude"] = np.nan
# for i in df.index:
#     postal_code =  df.loc[i,"PostalCode"]
#     lat_lng_coords = None
#     while(lat_lng_coords is None):
#         g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#         lat_lng_coords = g.latlng
#     print(i)
#     df.loc[i,"Latitude"] = lat_lng_coords[0]
#     df.loc[i,"Longitude"] = lat_lng_coords[1]
# df.head(11)

This methode takes forever

### Second approach using the provided csv file

In [29]:
long_lat_df = pd.read_csv("Geospatial_Coordinates.csv") #load the dataset
# create two columns
df["Latitude"] = np.nan
df["Longitude"] = np.nan

for i in df.index: #loop using the index
    postal = long_lat_df["Postal Code"].iloc[i] # get the postal code crospondeing to each longitude and latitude
    # Stack the logitude and latitude with their corrospondeing postal code
    df["Longitude"].loc[df["PostalCode"]==postal] = long_lat_df["Longitude"].iloc[i]
    df["Latitude"].loc[df["PostalCode"]==postal] = long_lat_df["Latitude"].iloc[i]

df.to_csv("Toronto_neighbourhood.csv") #save the data
df.head(11)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


## Toronto Neighbourhood analyzing

importing liberaries

In [30]:
import random # library for random number generation
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
from IPython.display import Image 
from IPython.core.display import HTML 
from pandas.io.json import json_normalize
import folium # plotting library

Getting Foursquare ID, secretcode, and token

In [31]:
CLIENT_ID = 'T3R3TQXS5UYZX15AUJOZLFZDBEPIWNU00LMJHPSZLBICQAH0' # your Foursquare ID
CLIENT_SECRET = 'V4RHTWCYJPYBTOKK0R1PYGE3Y4NU2WBI5RHZVAIRDGEVWHMN' # your Foursquare Secret
ACCESS_TOKEN = 'SK4JDDL5B03EUDTZTY5TONH3M03JU4S1RDEK3FQ3WDHYD2ZJ' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: T3R3TQXS5UYZX15AUJOZLFZDBEPIWNU00LMJHPSZLBICQAH0
CLIENT_SECRET:V4RHTWCYJPYBTOKK0R1PYGE3Y4NU2WBI5RHZVAIRDGEVWHMN


In [54]:
# I want to have Indian food near Don Mills
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID,
    CLIENT_SECRET,
    VERSION,
    latitude,
    longitude,
    radius,
    LIMIT)
results = requests.get(url).json['response']
results

<bound method Response.json of <Response [200]>>