# This a project for coursera capstone
## modify By YUJIN
## DATE TIME:2021-3-13

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import geocoder # import geocoder
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans
import folium 

### BeautifulSoup scrape the Wikipedia page

In [2]:
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

In [3]:
data  = requests.get(url).text 

In [4]:
soup = BeautifulSoup(data,"html5lib")  # create a soup object using the variable 'data'

In [5]:
tag_object=soup.title
tag_object

<title>List of postal codes of Canada: M - Wikipedia</title>

In [6]:
Toronto_data = pd.DataFrame(columns=["PostalCode", "Borough", "Neighborhood"])
for row in soup.find("tbody").find_all("tr"):
    col = row.find_all("td")
    if len(col)>=3:
        post=col[0].getText().strip()
        boro=col[1].getText().strip()
        neib=col[2].getText().strip()
        Toronto_data = Toronto_data.append({"PostalCode":post, "Borough":boro, "Neighborhood":neib}, ignore_index=True)

### Wikipedia data to dataframe
#### Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

In [7]:
Toronto_data=Toronto_data[~(Toronto_data['Borough'].str.contains('Not assigne'))]

#### If a cell has a borough but a Not assigned  neighborhood, then the neighborhood will be the same as the borough.

In [8]:
Toronto_data.loc[Toronto_data['Neighborhood'].str.contains('Not assigne'),'Neighborhood']=Toronto_data['Borough']

In [9]:
Toronto_data

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [10]:
Toronto_data.shape

(103, 3)

### Try Google Maps Geocoding API to get the latitude and the longitude 

In [13]:
for index, row in Toronto_data.iterrows():
    postal_code=row['PostalCode']
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
        print(postal_code,lat_lng_coords)
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]

M3A None


TypeError: 'NoneType' object is not subscriptable

### use the csv data

In [11]:
latlon=pd.read_csv('./Geospatial_Coordinates.csv')

In [12]:
latlon

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [13]:
data=pd.merge(Toronto_data, latlon, how='inner',on='PostalCode')

In [14]:
data

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [17]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude,longitude)

43.6534817 -79.3839347


In [19]:

# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(data['Latitude'], data['Longitude'], data['Borough'], data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto