# Segmenting and Clustering

### Import necessary Libraries

In [1]:
import pandas as pd 
import numpy as np 
import requests
from bs4 import BeautifulSoup

In [2]:
path='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw= requests.get(path).text   # obtaining the data 

soup = BeautifulSoup(raw,'xml') #parsing the data

table = soup.find('table',{'class':'wikitable sortable'}) #table
table_rows = table.find_all('tr')


In [3]:
data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

df = pd.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighbourhood'])  #making dataframe
df = df[~df['PostalCode'].isnull()]
df #printing the dataframe

Unnamed: 0,PostalCode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Downtown Toronto,Queen's Park
9,M8A,Not assigned,Not assigned
10,M9A,Queen's Park,Not assigned


In [4]:
df.shape

(287, 3)

In [5]:
df.drop(df[df['Borough']=="Not assigned"].index, axis=0, inplace=True)

### Making new dataframe 

In [6]:
df1= df.groupby('PostalCode').agg(lambda x: ','.join(x)) #grouping data
df_new=df1.reset_index() #reordeing new dataframe
df_new

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,"Scarborough,Scarborough","Rouge,Malvern"
1,M1C,"Scarborough,Scarborough,Scarborough","Highland Creek,Rouge Hill,Port Union"
2,M1E,"Scarborough,Scarborough,Scarborough","Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,"Scarborough,Scarborough,Scarborough","East Birchmount Park,Ionview,Kennedy Park"
7,M1L,"Scarborough,Scarborough,Scarborough","Clairlea,Golden Mile,Oakridge"
8,M1M,"Scarborough,Scarborough,Scarborough","Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,"Scarborough,Scarborough","Birch Cliff,Cliffside West"


In [7]:
df_new.shape

(103, 3)

# Second part

### Import necessary Libraries

In [8]:
import random 

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim  # module to convert an address into latitude and longitude values



Solving environment: done

# All requested packages already installed.



####  Getting a csv file that has the geographical coordinates of each postal code

In [9]:
path='http://cocl.us/Geospatial_data'
data=pd.read_csv(path)  

data.head() #printing data       

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
import pandas as pd #importing libraries
import numpy as np 

data.columns = ['PostalCode', 'Latitude', 'Longitude'] #Changing Columns Names
data.head()  #printing data


Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
alldata= pd.merge(df_new,data, on='PostalCode') #merge dataframes to get new dataframe
alldata.head() #printing data

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,"Scarborough,Scarborough","Rouge,Malvern",43.806686,-79.194353
1,M1C,"Scarborough,Scarborough,Scarborough","Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,"Scarborough,Scarborough,Scarborough","Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### Creating a csv file for merged data


In [12]:
alldata.to_csv(r'geo_data.csv')

# Third part

### Import necessary Libraries

In [13]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

import matplotlib.pyplot as plt # plotting library
# backend for rendering plots within the browser

import matplotlib.cm as cm
import matplotlib.colors as colors

%matplotlib inline 

from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs

print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.

Libraries imported.


In [14]:
new_data=pd.read_csv('geo_data.csv', index_col=0) #loading data 
new_data.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,"Scarborough,Scarborough","Rouge,Malvern",43.806686,-79.194353
1,M1C,"Scarborough,Scarborough,Scarborough","Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,"Scarborough,Scarborough,Scarborough","Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### Finding Toronto Coordinates

In [15]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="can_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The Coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The Coordinate of Toronto are 43.653963, -79.387207.


#### Creating Map of Canada

In [16]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(new_data['Latitude'], new_data['Longitude'], new_data['Borough'], new_data['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto
