# Neighbourhoods in Toronto

In [1]:
import requests
import lxml.html as lh
import pandas as pd

In [3]:
import folium

In [5]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [6]:
page = requests.get(url)

In [8]:
parsed = lh.fromstring(page.content)

In [9]:
table_rows = parsed.xpath('//tr')

Obviously postalcode table has three datacells in each row. Next we will determine the column names.

In [10]:
col=[]

for td in table_rows[0]:
    name=td.text_content().strip()
    col.append((name,[]))

In [11]:
col

[('Postcode', []), ('Borough', []), ('Neighbourhood', [])]

Now we populate the array.

In [12]:
#Since out first row is the header, data is stored on the second row onwards
for j in range(1,len(table_rows)):
    
    row=table_rows[j]
    
    if len(row)!=3:   #there are 3 datacells in each row
        break
    
    #i is the index of our column
    i=0
    
    #Iterate through each element of the row
    for td in row.iterchildren():
        data=td.text_content().strip()
        
        #Append the data to the empty list of the i'th column
        col[i][1].append(data)
        #Increment i for the next column
        i+=1

Next we will check the result

In [13]:
[len(C) for (title,C) in col]

[288, 288, 288]

Next we need dataframe

In [14]:
Dict={title:column for (title,column) in col}
df=pd.DataFrame(Dict)

In [15]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


Ignore 'Not assigned' boroughs

In [16]:
df2=df[df['Borough']!='Not assigned']

Group according to postal code.

In [17]:
df3=df2.groupby('Postcode')

We need one more dataframe. Thus we create empty one.

In [18]:
df4 = pd.DataFrame(columns=['Postcode','Borough','Neighbourhood'])

Now we are able to unify appropriate neighbourhoods into single postal code.

In [19]:
i=0
for code,group in df3:
    neighbourhoods=[]
    for nghb in group['Neighbourhood']:
        neighbourhoods.append(nghb)
    df4.loc[i]={'Postcode':code,'Borough':group['Borough'].iloc[0],'Neighbourhood':neighbourhoods}
    i=i+1

In [20]:
df4[:25]

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"[Rouge, Malvern]"
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]"
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]"
3,M1G,Scarborough,[Woburn]
4,M1H,Scarborough,[Cedarbrae]
5,M1J,Scarborough,[Scarborough Village]
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]"
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]"
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]"
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]"


The last thing is to replace 'Not assigned' items in neighbourhood column.

In [21]:
for rnum in range(0,df4.shape[0]-1):
    if df4.iloc[rnum]['Neighbourhood'][0]=='Not assigned':
        df4.iloc[rnum]['Neighbourhood'][0]=df4.iloc[rnum]['Borough']

In [22]:
df4.shape

(103, 3)

In [23]:
latlong=pd.read_csv('Geospatial_Coordinates.csv')

In [25]:
latlong[:25]

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


After visually checked, that order of postal codes is equal, we are able to add two colums to the df4-dataframe.

In [26]:
df4['Latitude']=latlong['Latitude']

In [27]:
df4['Longitude']=latlong['Longitude']

In [28]:
df4.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"[Rouge, Malvern]",43.806686,-79.194353
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]",43.784535,-79.160497
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]",43.763573,-79.188711
3,M1G,Scarborough,[Woburn],43.770992,-79.216917
4,M1H,Scarborough,[Cedarbrae],43.773136,-79.239476


## This is map of Toronto

In [29]:
toronto_map=folium.Map(location=(43.806686,-79.194353),zoom_start=10)

In [30]:
toronto_map

Then we add neighbourhoods to the map. Geojason file can be found from the internet. There are some sources of geological information.

In [31]:
toronto_geo=r'Toronto.geojson'

In [36]:
toronto_map.choropleth(geo_data=toronto_geo)

In [37]:
toronto_map