# Week 1 - Exercise

# Coursera Capstone Project

#### This project will use the foursquare API to explore and find the new apartment location

In [1]:
import pandas as pd
import numpy as np

In [2]:
print('Hello Capstone Project Course!')

Hello Capstone Project Course!


# Week 3 - Exercise

In [3]:
!pip install beautifulsoup4



In [4]:
import requests
from bs4 import BeautifulSoup

In [5]:
# Getting Wikipedia HTML content
req = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

In [6]:
#Create a soup
soup = BeautifulSoup(req.text, 'lxml')

In [7]:
#Extract table body
table_body = soup.find('div', {'class': 'mw-parser-output'}).find('table').find('tbody')

In [8]:
#Extract all of the table rows and columns
table_lines =table_body.findAll('td')

In [9]:
#Extracting the postal code, borough and neighborhoods data from the soup
lst =[]
for i, element in enumerate(table_lines):
    span = table_lines[i].find('span')
    if span.text != 'Not assigned':
        postcode = element.find('b').text
        neighborhoods = element.findAll('a')
        areas = []
        for i, link in enumerate(neighborhoods):
                if i == 0:
                    borough = link.text
                    
                else:
                    areas.append(link.text)
            
        lst.append({
            'Postalcode': postcode,
            'Borough': borough,
            'Neighborhood': ','.join(areas)})

In [10]:
#Convert output to the dataframe
df = pd.DataFrame(lst)

In [21]:
#Restructure the format
df_copy = df[['Postalcode', 'Borough', 'Neighborhood']]

In [22]:
#Replace blanks with NaN
df_copy.replace(r'^\s*$', np.nan, regex=True, inplace = True)

In [23]:
#Fill neighborhood with borough if not assigned
df_copy.Neighborhood.fillna(df.Borough, inplace=True)

In [25]:
# The code was removed by Watson Studio for sharing.

In [26]:
df_copy.shape

(102, 3)

In [27]:
df_copy

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park,Harbourfront"
3,M6A,North York,"Lawrence Manor,Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park,Ontario Provincial Government"
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern,Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill,Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District,Ryerson"


# Geocoding Exercise

In [17]:
#unable to use geocoder so using alternative library
import pgeocode

def latitude(zipcode):
    nomi = pgeocode.Nominatim('ca')
    result = nomi.query_postal_code(zipcode)
    return result['latitude']

def longitude(zipcode):
    nomi = pgeocode.Nominatim('ca')
    result = nomi.query_postal_code(zipcode)
    return result['longitude']

In [28]:
#Get the latitude and longitude data using pgeocode
df_copy['Latitude'] = df_copy['Postalcode'].map(lambda x: latitude(x))
df_copy['Longitude'] = df_copy['Postalcode'].map(lambda x: longitude(x))

In [80]:
#Extract the borough which are only located within toronto area
df_toronto = df_copy[df_copy['Borough'].str.contains("Toronto")]
df_toronto

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park,Harbourfront",43.6555,-79.3626
4,M7A,Downtown Toronto,"Queen's Park,Ontario Provincial Government",43.6641,-79.3889
9,M5B,Downtown Toronto,"Garden District,Ryerson",43.6572,-79.3783
15,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756
20,M5E,Downtown Toronto,Downtown Toronto,43.6456,-79.3754
24,M5G,Downtown Toronto,Bay Street,43.6564,-79.386
25,M6G,Downtown Toronto,Downtown Toronto,43.6683,-79.4205
30,M5H,Downtown Toronto,"Richmond,King",43.6496,-79.3833
36,M5J,Downtown Toronto,"Harbourfront,Union Station,Toronto Islands",43.623,-79.3936
42,M5K,Downtown Toronto,"Toronto Dominion Centre,Design Exchange",43.6469,-79.3823


### 

In [79]:
import folium

# create map of CA using latitude and longitude values
map_manhattan = folium.Map(location=[43.6555,-79.3626], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  
    
map_manhattan