# Question 1

In [1]:
# Start by creating a new Notebook for this assignment.
# setup import
import pandas as pd 
import wikipedia as wp
from bs4 import BeautifulSoup

# Use the Notebook to build the code to scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes and to transform the data into a pandas dataframe
# enter the h1 element in wp.page(h1)
html = wp.page("List of postal codes of Canada: M").html().encode("UTF-8")

# determine the index of your table
df = pd.read_html(html, header = 0)[0]

# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
df = df[df.Borough != 'Not assigned']

# More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above table.
df = df.groupby(['Postal Code', 'Borough'])['Neighborhood'].apply(list).apply(lambda x:', '.join(x)).to_frame().reset_index()

# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough. So for the 9th cell in the table on the Wikipedia page, the value of the Borough and the Neighborhood columns will be Queen's Park.
for index, row in df.iterrows():
    if row['Neighborhood'] == 'Not assigned':
        row['Neighborhood'] = row['Borough']

# Clean your Notebook and add Markdown cells to explain your work and any assumptions you are making.
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [2]:
df.shape

(103, 3)

# Question 2

In [3]:
# in order to utilize the Foursquare location data, we need to get the latitude and the longitude coordinates of each neighborhood.
import requests
import io

url="http://cocl.us/Geospatial_data"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

# rename the first column to allow merging dataframes on Postcode
c.columns = ['Postal Code', 'Latitude', 'Longitude']
df = pd.merge(c, df, on='Postal Code')

# reorder column names and show the dataframe
df = df[['Postal Code', 'Borough', 'Neighborhood', 'Latitude', 'Longitude']]
df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# Question 3

In [12]:
Toronto=df[df['Borough'].str.contains('Toronto')]
Toronto

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Coordinates
37,M4E,East Toronto,The Beaches,43.676357,-79.293031,"(-79.2930312, 43.67635739999999)"
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,"(-79.352188, 43.6795571)"
42,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,"(-79.31557159999998, 43.6689985)"
43,M4M,East Toronto,Studio District,43.659526,-79.340923,"(-79.340923, 43.6595255)"
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,"(-79.3887901, 43.7280205)"
45,M4P,Central Toronto,Davisville North,43.712751,-79.390197,"(-79.3901975, 43.7127511)"
46,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678,"(-79.40567840000001, 43.7153834)"
47,M4S,Central Toronto,Davisville,43.704324,-79.38879,"(-79.3887901, 43.7043244)"
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,"(-79.38315990000001, 43.6895743)"
49,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,"(-79.4000493, 43.68641229999999)"


In [14]:
from geopy.geocoders import Nominatim
import folium

address = 'Toronto'
geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(Toronto['Latitude'], Toronto['Longitude'], 
                                           Toronto['Borough'], Toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(Toronto_map)  
    
Toronto_map