In [1]:
import pandas as pd #provides high-performance & creates data frames
import requests # to read from html using Requests API.
from bs4 import BeautifulSoup #Getting data out of HTML, XML, and other markup languages.

In [2]:
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_url,'lxml') #reading the HTML page
My_table = soup.find('table',{'class':'wikitable sortable'}) #selecting only the table

Postcode=[]
Borough=[]
Neighbourhood=[]

for row in My_table.find_all('tr')[1:]:
    col=row.find_all('td')
    col1=col[0].text.strip()
    Postcode.append(col1)
    col2=col[1].text.strip()
    Borough.append(col2)
    col3=col[2].text.strip()
    Neighbourhood.append(col3)
    
columns={'Postcode':Postcode,'Borough':Borough,'Neighbourhood':Neighbourhood} #form the table with column name
#Create a dataframe from the columns variable
df_full = pd.DataFrame(columns)

df_full['Neighbourhood'][df_full['Neighbourhood'] == 'Not assigned'] = df_full['Borough'] #Replace 'Not assigned' Neighborhoods with Borough name
df_mid=df_full[df_full.Borough != 'Not assigned'] #Remove the 'Not assigned' Boroughs
df_mid.reset_index(drop = True, inplace = True) #Reset the index

df2_final=(df_mid.groupby('Postcode').agg(lambda x: ','.join(set(x))).reset_index()) #Concatenating the data
print(df2_final.shape)
df2_final.head(10)

(103, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park,Ionview,East Birchmount Park"
7,M1L,Scarborough,"Golden Mile,Clairlea,Oakridge"
8,M1M,Scarborough,"Cliffside,Scarborough Village West,Cliffcrest"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


# Module 2

**Install Geocoder package**

In [3]:
! pip install geocoder 

Requirement not upgraded as not directly required: geocoder in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages
Requirement not upgraded as not directly required: six in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement not upgraded as not directly required: ratelim in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement not upgraded as not directly required: click in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement not upgraded as not directly required: future in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement not upgraded as not directly required: requests in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from geocoder)
Requirement not upgraded as not directly required: decorator in /opt/conda/envs/DSX-Python35/lib/python3.5/site-packages (from ratelim->geocoder)
Requirement not upgraded as not directly required: chardet<3.1.0,

**Import the package the get the value**

In [4]:
import geocoder
g = geocoder.google('Mountain View, CA')
g

<[OVER_QUERY_LIMIT] Google - Geocode [empty]>

**Since Geocode failed to fetch data, using the CSV to fetch the data**

In [5]:
#Reading the CSV file & renaming the column to match teh first dataframe
df_cord=pd.read_csv("http://cocl.us/Geospatial_data")
df_cord.rename(index=str, columns={"Postal Code":"Postcode"},inplace=True)
df_cord.head(10)

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


**Merging both the dataframes**

In [6]:
df_merge=pd.merge(df2_final, df_cord, on='Postcode')
df_merge.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
