# CAPSTONE PROJECT - COVID MOBILITY

Importing Liberaries

In [1]:
import pandas as pd 
import json
from geopy.geocoders import Nominatim 
import requests 
from pandas.io.json import json_normalize 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium 

Importing Data

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

df=pd.read_html(url,header=0)
df = df[0]
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


Dropping the Rows where Borough is 'Not assigned' and converting 'Not assigned' in Neighborhood to similar to Boroughs

In [3]:
df = df[df.Borough != "Not assigned"].reset_index(drop=True)
df.Neighborhood.replace("Not assigned", df.Borough, inplace=True)
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


Replacing the '/' with ',' to divide the Neighborhood

In [4]:
df['Neighborhood']=df.Neighborhood.str.replace('/',',')
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


Finding the shape

In [5]:
df.shape

(103, 3)

Importing the Geographical data

In [6]:
df_geo=pd.read_csv('https://cocl.us/Geospatial_data')
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Adjusting the Column names and merging the 2 databases to one

In [7]:
df_geo.rename(columns={'Postal Code':'Postal code'},inplace=True)
df_canada=pd.merge(df,df_geo,on=['Postal code'])
df_canada.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494


Selecting only boroughs that contain the word Toronto

In [8]:
Toronto_data = df_canada[df_canada['Borough'].str.contains('Toronto')].reset_index(drop=True)
Toronto_data.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


Getting a Look at how the datapoints are located on the map 

In [9]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [10]:
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=15)

for lat, lng, label in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], Toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.8,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

Using Foursquare to Explore the Neighborhoods in Toronto

In [11]:
CLIENT_ID = 'BMCPFLRXKSSQSMCOBF3M3NFRELEGS35ACDJYAO1L4KWA2L2H' 
CLIENT_SECRET = 'K2TJGI5D1XMRHEXLFHUDUEGVHC4TH5LGN514BGGBKF1YKLMP' 
VERSION = '20180605'

In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=600):
    LIMIT=100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

toronto_venues = getNearbyVenues(names=Toronto_data['Neighborhood'],
                                   latitudes=Toronto_data['Latitude'],
                                   longitudes=Toronto_data['Longitude'])

Regent Park , Harbourfront
Queen's Park , Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond , Adelaide , King
Dufferin , Dovercourt Village
Harbourfront East , Union Station , Toronto Islands
Little Portugal , Trinity
The Danforth West , Riverdale
Toronto Dominion Centre , Design Exchange
Brockton , Parkdale Village , Exhibition Place
India Bazaar , The Beaches West
Commerce Court , Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West
High Park , The Junction South
North Toronto West
The Annex , North Midtown , Yorkville
Parkdale , Roncesvalles
Davisville
University of Toronto , Harbord
Runnymede , Swansea
Moore Park , Summerhill East
Kensington Market , Chinatown , Grange Park
Summerhill West , Rathnelly , South Hill , Forest Hill SE , Deer Park
CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst  Quay , South Niagara , Island airport
Rosed

In [13]:
print(toronto_venues.shape)
toronto_venues.head()

(2128, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park , Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park , Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park , Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park , Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park , Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


Grouping the Database on Neighborhood and veryfing how many venues are returned

In [14]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,89,89,89,89,89,89
"Brockton , Parkdale Village , Exhibition Place",41,41,41,41,41,41
Business reply mail Processing CentrE,25,25,25,25,25,25
"CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport",18,18,18,18,18,18
Central Bay Street,100,100,100,100,100,100
Christie,18,18,18,18,18,18
Church and Wellesley,100,100,100,100,100,100
"Commerce Court , Victoria Hotel",100,100,100,100,100,100
Davisville,48,48,48,48,48,48
Davisville North,14,14,14,14,14,14


In [15]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 256 uniques categories.


# Analyzing each Neighborhood

In [16]:
Tor_oh = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

Tor_oh['Neighborhood'] = toronto_venues['Neighborhood'] 

fixed_columns = [Tor_oh.columns[-1]] + list(Tor_oh.columns[:-1])
Tor_oh = Tor_oh[fixed_columns]

print('Tor_oh shape:', Tor_oh.shape)
Tor_oh.head()

Tor_oh shape: (2128, 256)


Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,Animal Shelter,...,Train Station,Tram Station,Udon Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
Tor_group = Tor_oh.groupby('Neighborhood').mean().reset_index()
print('Tor_group shape:', Tor_group.shape)
Tor_group

Tor_group shape: (39, 256)


Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Amphitheater,...,Train Station,Tram Station,Udon Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011236,0.0,...,0.0,0.0,0.0,0.011236,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton , Parkdale Village , Exhibition Place",0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business reply mail Processing CentrE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower , King and Spadina , Railway Lands , ...",0.0,0.0,0.055556,0.055556,0.111111,0.166667,0.111111,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0
7,"Commerce Court , Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


So lets check all the Venues that returned, so that we can select the Right coloumn names to create the required Database

In [18]:
for col in Tor_group.columns:
    print(col)

Neighborhood
Yoga Studio
Afghan Restaurant
Airport
Airport Food Court
Airport Lounge
Airport Service
Airport Terminal
American Restaurant
Amphitheater
Animal Shelter
Antique Shop
Aquarium
Arepa Restaurant
Art Gallery
Arts & Crafts Store
Asian Restaurant
Athletics & Sports
BBQ Joint
Baby Store
Bagel Shop
Bakery
Bank
Bar
Baseball Stadium
Basketball Stadium
Beach
Beer Bar
Beer Store
Belgian Restaurant
Bike Rental / Bike Share
Bistro
Boat or Ferry
Bookstore
Boutique
Bowling Alley
Brazilian Restaurant
Breakfast Spot
Brewery
Bubble Tea Shop
Building
Burger Joint
Burrito Place
Bus Line
Bus Stop
Butcher
Café
Cajun / Creole Restaurant
Camera Store
Candy Store
Cantonese Restaurant
Caribbean Restaurant
Cheese Shop
Chinese Restaurant
Chiropractor
Chocolate Shop
Church
Climbing Gym
Clothing Store
Cocktail Bar
Coffee Shop
College Arts Building
College Auditorium
College Cafeteria
College Gym
College Quad
College Rec Center
Colombian Restaurant
Comfort Food Restaurant
Comic Shop
Concert Hall
Convenie

Now, we need the Hospitals and pahramacies that are located in these Neighborhods
along with it the Essential groceries,for which I have selected the Farmers Market, so that farmers can directly sell here and middlemen are eliminated,
as the more number of people are involved the higher the chances of Virus spread.

In [19]:
Tor_Covid = Tor_group[["Neighborhood","Hospital","Pharmacy","Farmers Market"]]
Tor_Covid.head()

Unnamed: 0,Neighborhood,Hospital,Pharmacy,Farmers Market
0,Berczy Park,0.0,0.0,0.022472
1,"Brockton , Parkdale Village , Exhibition Place",0.0,0.02439,0.0
2,Business reply mail Processing CentrE,0.0,0.0,0.04
3,"CN Tower , King and Spadina , Railway Lands , ...",0.0,0.0,0.0
4,Central Bay Street,0.0,0.01,0.0


*Cluster Neighborhoods*

Run k-means to cluster the neighborhoods in Toronto into 3 clusters.

In [20]:
kclusters = 3

Tor_grouped_clustering = Tor_Covid.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Tor_grouped_clustering)

kmeans.labels_[0:10] 

array([0, 1, 0, 1, 1, 1, 1, 1, 0, 2], dtype=int32)

In [21]:
Tor_Covid.insert(0, 'ClusterLabels', kmeans.labels_)

In [22]:
Toronto_merge = Toronto_data

Toronto_merge = Toronto_merge.join(Tor_Covid.set_index('Neighborhood'), on='Neighborhood',how = 'right')
Toronto_merge.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,ClusterLabels,Hospital,Pharmacy,Farmers Market
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,0,0.0,0.0,0.017544
1,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494,1,0.0,0.016949,0.0
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1,0.0,0.0,0.0
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,0.0,0.0,0.02
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,1,0.0,0.0,0.0


Plotting Cluster Neighborhods on the Map for better analysis

In [23]:
import numpy as np

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merge['Latitude'], Toronto_merge['Longitude'], Toronto_merge['Neighborhood'], Toronto_merge['ClusterLabels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
    
       
map_clusters

Examining The Clusters

In [24]:
Toronto_merge.loc[Toronto_merge['ClusterLabels'] == 0, Toronto_merge.columns[[1] + list(range(5, Toronto_merge.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Hospital,Pharmacy,Farmers Market
0,Downtown Toronto,0,0.0,0.0,0.017544
3,Downtown Toronto,0,0.0,0.0,0.02
5,Downtown Toronto,0,0.0,0.0,0.022472
15,East Toronto,0,0.0,0.0,0.03125
26,Central Toronto,0,0.0,0.020833,0.020833
30,Downtown Toronto,0,0.01,0.0,0.02
34,Downtown Toronto,0,0.0,0.0,0.02
38,East Toronto,0,0.0,0.0,0.04


In [25]:
Toronto_merge.loc[Toronto_merge['ClusterLabels'] == 1, Toronto_merge.columns[[1] + list(range(5, Toronto_merge.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Hospital,Pharmacy,Farmers Market
1,Downtown Toronto,1,0.0,0.016949,0.0
2,Downtown Toronto,1,0.0,0.0,0.0
4,East Toronto,1,0.0,0.0,0.0
6,Downtown Toronto,1,0.0,0.01,0.0
7,Downtown Toronto,1,0.0,0.0,0.0
8,Downtown Toronto,1,0.0,0.0,0.0
10,Downtown Toronto,1,0.0,0.0,0.0
11,West Toronto,1,0.0,0.0,0.0
12,East Toronto,1,0.0,0.0,0.0
13,Downtown Toronto,1,0.0,0.0,0.0


In [26]:
Toronto_merge.loc[Toronto_merge['ClusterLabels'] == 2, Toronto_merge.columns[[1] + list(range(5, Toronto_merge.shape[1]))]]

Unnamed: 0,Borough,ClusterLabels,Hospital,Pharmacy,Farmers Market
9,West Toronto,2,0.0,0.095238,0.0
20,Central Toronto,2,0.0,0.071429,0.0


As we analyse the clusters it is clear that Cluster 1 has many Farmers Market, so that incase of lockdowns, all essential basic food items can be supplied through them, also in these clusters wherever there is shortage of Pharmacy, essential medicines should also be made available for sale from the same.
In Cluster 2 and 3, there is shortage of Farmers markets, so these pharmacies should be utilised for sale of essential food items.
There is huge shortage of Hospitals, except for in cluster 1 in both Clusters 2 & 3, so here also to overcome this Pharmacies should be equiped with necessary First aid kits and staff.
Further basing on the visual map it is clear that as you move away from the coast the clusters expand thereby showcasing the need to establish sufficient number of  camps to overcome the COVID Difficulties.