## Segregation and clustering neighbourhoods in toronto city

In [1]:
#Libraries needed
import numpy as np 
import requests
import pandas as pd 
from bs4 import BeautifulSoup # To webscarp the data
import json # Library to handle JSON files
#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # To transform an address into geographical Coordinates
from pandas.io.json import json_normalize #To tranform JSON file into a pandas dataframe

# Matplotlib for data visualization
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

print('Libraries downloaded.')

Libraries downloaded.


In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
result = requests.get(url).text

In [4]:
# webscarping data using beautifulsoup package
soup = BeautifulSoup(result, 'html.parser')

In [5]:
#creating lists to store parsed data
postalCode = []
borough = []
neighborhood = []

In [6]:
# parsing
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')
    if(len(cells) > 0):
        postalCode.append(cells[0].text)
        borough.append(cells[1].text)
        neighborhood.append(cells[2].text.rstrip('\n'))

In [7]:
# Creating a DataFrame from the three lists
toronto = pd.DataFrame({"PostalCode": postalCode,
                           "Borough": borough,
                           "Neighborhood": neighborhood})

In [8]:
toronto

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A\n,Not assigned\n,Not assigned
1,M2A\n,Not assigned\n,Not assigned
2,M3A\n,North York\n,Parkwoods
3,M4A\n,North York\n,Victoria Village
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront"
5,M6A\n,North York\n,"Lawrence Manor, Lawrence Heights"
6,M7A\n,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government"
7,M8A\n,Not assigned\n,Not assigned
8,M9A\n,Etobicoke\n,"Islington Avenue, Humber Valley Village"
9,M1B\n,Scarborough\n,"Malvern, Rouge"


In [9]:
# Removing the whitespacings and '\n' obtained from parsing
toronto['PostalCode'] = toronto['PostalCode'].apply(lambda x: x.strip())
toronto['Borough'] = toronto['Borough'].apply(lambda x: x.strip())

In [10]:
# Dropping the Boroughs with 'Not assigned'
toronto = toronto[toronto.Borough != 'Not assigned'].reset_index(drop=True)

In [11]:
toronto

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [12]:
# Grouping PostalCode and Borough coloumns
toronto_grouped = toronto.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
toronto_grouped 

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [13]:
# Assigning the name of borough for the neighborhood with "Not assigned"
for index, i in toronto_grouped.iterrows():
    if i["Neighborhood"] == "Not assigned":
        i["Neighborhood"] = row["Borough"]

In [14]:
#Checking the shape
toronto_grouped.shape

(103, 3)

In [15]:
data = pd.read_csv('https://cocl.us/Geospatial_data')

In [16]:
data.rename(columns={"Postal Code":"PostalCode"},inplace=True)


In [17]:
#new dataframe containing final data in new_df
new_df=toronto_grouped.merge(data,on="PostalCode",how = "left")
new_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [18]:
#Installing Folium
!pip install folium



In [19]:
#Importing Folium
import folium
torontomap= folium.Map(location=[43.6532, -79.3832], zoom_start=10)
torontomap

In [20]:
#assigning coordinates
locations = new_df[['Latitude', 'Longitude']]
locationlist = locations.values.tolist()
len(locationlist)

103

In [21]:
#Displaying points in the map
for point in range(0, len(locationlist)):
    folium.Marker(locationlist[point], popup=new_df['Borough'][point]).add_to(torontomap)
torontomap