In [5]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd
import random
import folium
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim

After we import all of the packages we're gonna need for our analysis, we'll start by scraping an online table of neighborhoods from Toronto then set it to a pandas dataframe, df.

In [9]:
data = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(data,"html5lib")

In [10]:
contents = []
table = soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['Postal Code'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        contents.append(cell)

In [11]:
df = pd.DataFrame(contents)
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East TorontoBusiness reply mail Processing Cen...,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


Now, since we want to be able to use Foursquare data later on, we want to be able to associate each neighborhood with a set of coordinates. I've downloaded a csv file to my computer of all coordinates of each postal code in Toronto. So we'll go ahead and merge this data in with our neighborhoods dataframe and call it, DF. 

In [12]:
r = pd.read_csv(r'C:\Users\adardick\Downloads\Geospatial_Coordinates.csv')

In [60]:
DF = pd.merge(df, r, on='Postal Code')
DF.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


At this point, we want to be able to get a collection of all venues within a certain radius of all of our neighborhoods. We'll choose 500 meters since that's roughly walking distance.

In [14]:
client_ID = 'N1S1NLEIDD2O4YC10EZ2YONJX5NADGZDM2MDPDHDKWW2NZRH'
secret = 'QDUA3YVZJFXD0OZEV14PS42OW0MJ2BGNAELYKFXVUDAI4YHZ'
version = '20180604'
limit = 30

In [15]:
def getnearbyVenues(neighborhood, lats, lons, radius=500):
    venues = []
    for name, lat, lng in zip(neighborhood, lats, lons):
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            client_ID,
            secret,
            version,
            lat,
            lng,
            radius,
            limit)
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
    
        venues.append([(
            name,
            lat,
            lng,
            i['venue']['name'],
            i['venue']['location']['lat'],
            i['venue']['location']['lng'],
            i['venue']['categories'][0]['name']) for i in results])
        
    nearby_venues = pd.DataFrame([item for venue in venues for item in venue])
    nearby_venues.columns = ['Neighborhood',
                  'Neighborhood Latitude',
                  'Neighborhood Longitude',
                  'Venue Name',
                  'Venue Latitude',
                  'Venue Longitude',
                  'Venue Category']
    
     
    return (nearby_venues)

In [16]:
toronto_venues = getnearbyVenues(DF['Neighborhood'], DF['Latitude'], DF['Longitude'])

In [121]:
toronto_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,GTA Restoration,43.753396,-79.333477,Fireworks Store
3,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
4,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
5,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
6,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
7,Victoria Village,43.725882,-79.315572,Eglinton Ave E & Sloane Ave/Bermondsey Rd,43.726086,-79.31362,Intersection
8,Victoria Village,43.725882,-79.315572,Pizza Nova,43.725824,-79.31286,Pizza Place
9,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery


Now, we want to be able to analyze what venues we would consider to be benefitial to having around our future coffee shop, and what venues would be considered detrimental to the success of our future coffee shop if they were nearby. After analyzing the list of venues, I created a list of good venues, "good_venues", and a list of bad venues, "bad_venues".

In [17]:
toronto_venues['Venue Category'].unique()

array(['Park', 'Fast Food Restaurant', 'Fireworks Store',
       'Food & Drink Shop', 'Hockey Arena', 'Coffee Shop',
       'Portuguese Restaurant', 'Intersection', 'Pizza Place', 'Bakery',
       'Distribution Center', 'Restaurant', 'Spa', 'Pub',
       'Gym / Fitness Center', 'Historic Site', 'Breakfast Spot',
       'Chocolate Shop', 'Performing Arts Venue', 'Farmers Market',
       'Dessert Shop', 'French Restaurant', 'Mexican Restaurant',
       'Theater', 'Yoga Studio', 'Event Space', 'Café', 'Boutique',
       'Furniture / Home Store', 'Vietnamese Restaurant',
       'Clothing Store', 'Accessories Store', 'Miscellaneous Shop',
       'Italian Restaurant', 'Beer Bar', 'Sushi Restaurant', 'Creperie',
       'Fried Chicken Joint', 'Hobby Shop', 'Burrito Place', 'Diner',
       'Japanese Restaurant', 'Smoothie Shop', 'Bank', 'Sandwich Place',
       'Gym', 'College Auditorium', 'Bar', 'Caribbean Restaurant',
       'Baseball Field', 'Athletics & Sports', 'Gastropub', 'Pharmacy',
   

In [157]:
good_venues = ['Office', 'Airport', 'College Arts Building', 'College Gym', 'Coworking Space', 'Metro Station', 'Bus Station', 'Train Station', 'IT Services', 'Dance Studio', 'Business Service', 'Art Museum', 'Museum', 'Shopping Plaza', 'Shopping Mall', 'Construction & Landscaping', 'Plaza', 'College Auditorium', 'Theater', 'Bookstore', 'Stationery Store', 'Truck Stop', 'History Museum', 'Auto Garage', 'Breakfast Spot']

In [34]:
bad_venues = ['Coffee Shop', 'Café', 'Tea Room', 'Lounge', 'Food Court', 'Airport Food Court']

At this point, we want to one-hot encode our category attribute in order to quantify which neighborhoods have good venues, and which ones have bad. We'll go ahead and group each neighborhood up and count how much of each venues they have individually. 

In [22]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']
                                               
columnss = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
                                               
toronto_onehot = toronto_onehot[columnss]

In [33]:
toronto_groups = toronto_onehot.groupby('Neighborhood').sum().reset_index()
toronto_groups

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Trail,Train Station,Truck Stop,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Agincourt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Alderwood, Long Branch",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Bathurst Manor, Wilson Heights, Downsview North",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Bayview Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Bedford Park, Lawrence Manor East",0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Willowdale West,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
96,"Willowdale, Newtonbrook",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
97,Woburn,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
98,Woodbine Heights,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


We'll now create a new dataframe to hold the information we're really looking for, the number of good venues nearby, the number of bad venues nearby, and the ratio of good venues to bad venues. We'll call it toronto_coffee

In [36]:
col = ['Neighborhood', '# of Good Venues', '# of Bad Venues', 'Ratio of good/bad venues']
toronto_coffee = pd.DataFrame(columns=col)
toronto_coffee['Neighborhood'] = toronto_groups['Neighborhood']

We'll create an algorithm to count all of the numbers in a row, under certain categories. So if created "get_sum", we can input the row number and a list of categories, and it'll add up all entries for that row for the given list of categories. Afterwards, we'll fill in our new dataframe with the sums of good and bad venues around each neighborhood.

In [44]:
def get_sum(row, cat):
    s = []
    for i in range(len(cat)):
        s.append(row[cat[i]])
    return np.sum(s)

In [45]:
get_sum(toronto_groups.iloc[0,:], good_venues)

1

In [46]:
for k in np.arange(toronto_groups.shape[0]):
    toronto_coffee.iloc[k, 1] = get_sum(toronto_groups.iloc[k,:], good_venues)
    toronto_coffee.iloc[k, 2] = get_sum(toronto_groups.iloc[k,:], bad_venues)
    
toronto_coffee.head()    

Unnamed: 0,Neighborhood,# of Good Venues,# of Bad Venues,Ratio of good/bad venues
0,Agincourt,1,1,
1,"Alderwood, Long Branch",0,1,
2,"Bathurst Manor, Wilson Heights, Downsview North",1,2,
3,Bayview Village,0,1,
4,"Bedford Park, Lawrence Manor East",0,3,


At this point, now we just have to create a function that divided the number of good venues by the bad venues in order to find our last column.

In [47]:
def special_div(a, b):
    if b == 0:
        return a
    else:
        return a/b

In [49]:
for k in np.arange(toronto_groups.shape[0]):
    toronto_coffee.iloc[k, 3] = special_div(toronto_coffee.iloc[k, 1], toronto_coffee.iloc[k, 2])

toronto_coffee.head()

Unnamed: 0,Neighborhood,# of Good Venues,# of Bad Venues,Ratio of good/bad venues
0,Agincourt,1,1,1.0
1,"Alderwood, Long Branch",0,1,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",1,2,0.5
3,Bayview Village,0,1,0.0
4,"Bedford Park, Lawrence Manor East",0,3,0.0


Now that we have the information we're looking for, we're gona use kclustering to group our neighborhoods up based on these 3 categories. We'll then add the cluster label and our toronto_coffee dataframe to our original DF dataframe with out location data, so we can start to get a bigger picture of the data.

In [59]:
kclusters = 3

toronto_cluster = toronto_coffee.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=42).fit(toronto_cluster)

In [61]:
toronto_coffee['Cluster Label'] = kmeans.labels_

complete_coffee = DF.join(toronto_coffee.set_index('Neighborhood'), on='Neighborhood')

complete_coffee.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,# of Good Venues,# of Bad Venues,Ratio of good/bad venues,Cluster Label
0,M3A,North York,Parkwoods,43.753259,-79.329656,0,0,0.0,0.0
1,M4A,North York,Victoria Village,43.725882,-79.315572,0,1,0.0,0.0
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,3,7,0.428571,1.0
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,1,0.0,0.0
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494,2,8,0.25,1.0


First, we have to drop all rows that contain null values.

In [69]:
completed_coffee = complete_coffee.dropna(0)
completed_coffee['# of Good Venues'].isna().sum()

0

Now, we're gonna go ahead and map all of our neighborhoods using a folium map and colorcoat them by their cluster number.

In [63]:
address = '770 Don Mills Road North York, ON, Canada'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [104]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

rainbow = ['blue', 'black', 'green']

for lat, lon, poi, cluster in zip(completed_coffee['Latitude'], completed_coffee['Longitude'], completed_coffee['Neighborhood'], completed_coffee['Cluster Label']):
    label = folium.Popup(str(poi) + 'Cluster' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat,lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color = rainbow[int(cluster-1)],
        fill_opacity = 0.7).add_to(map_clusters)

map_clusters

Let's look at the data for each of our clusters.

In [95]:
pd.set_option('display.max_rows', None)
one_clust = completed_coffee[completed_coffee['Cluster Label']==0].reset_index()
two_clust = completed_coffee[completed_coffee['Cluster Label']==1].reset_index()
three_clust = completed_coffee[completed_coffee['Cluster Label']==2].reset_index()
one_clust

Unnamed: 0,index,Postal Code,Borough,Neighborhood,Latitude,Longitude,# of Good Venues,# of Bad Venues,Ratio of good/bad venues,Cluster Label
0,0,M3A,North York,Parkwoods,43.753259,-79.329656,0,0,0,0.0
1,1,M4A,North York,Victoria Village,43.725882,-79.315572,0,1,0,0.0
2,3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,1,0,0.0
3,6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,0,0,0,0.0
4,7,M3B,North York,Don Mills North,43.745906,-79.352188,0,1,0,0.0
5,8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,1,1,1,0.0
6,10,M6B,North York,Glencairn,43.709577,-79.445073,0,0,0,0.0
7,11,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724,0,0,0,0.0
8,12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1,0,1,0.0
9,14,M4C,East York,Woodbine Heights,43.695344,-79.318389,1,0,1,0.0


In [96]:
two_clust

Unnamed: 0,index,Postal Code,Borough,Neighborhood,Latitude,Longitude,# of Good Venues,# of Bad Venues,Ratio of good/bad venues,Cluster Label
0,2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,3,7,0.428571,1.0
1,4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494,2,8,0.25,1.0
2,9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,3,6,0.5,1.0
3,24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,2,10,0.2,1.0
4,30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,1,7,0.142857,1.0
5,33,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,2,5,0.4,1.0
6,36,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,4,4,1.0,1.0
7,42,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576,2,7,0.285714,1.0
8,43,M6K,West Toronto,"Brockton, Parkdale Village, Exhibition Place",43.636847,-79.428191,2,5,0.4,1.0
9,48,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648198,-79.379817,1,6,0.166667,1.0


In [97]:
three_clust

Unnamed: 0,index,Postal Code,Borough,Neighborhood,Latitude,Longitude,# of Good Venues,# of Bad Venues,Ratio of good/bad venues,Cluster Label
0,13,M3C,North York,Don Mills South,43.7259,-79.340923,1,2,0.5,2.0
1,15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1,4,0.25,2.0
2,17,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201,1,2,0.5,2.0
3,22,M1G,Scarborough,Woburn,43.770992,-79.216917,0,2,0.0,2.0
4,23,M4G,East York,Leaside,43.70906,-79.363452,2,3,0.666667,2.0
5,25,M6G,Downtown Toronto,Christie,43.669542,-79.422564,0,4,0.0,2.0
6,28,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,1,2,0.5,2.0
7,37,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975,1,2,0.5,2.0
8,41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,1,2,0.5,2.0
9,55,M5M,North York,"Bedford Park, Lawrence Manor East",43.733283,-79.41975,0,3,0.0,2.0


After reviewing the clusters, it's clear the cluster 2 has a very high amount of bad venues per neighborhood, cluster 3 has a fewer amount of bad venues per neighborhood (and also a smaller amount of good venues), and cluster 1 has the smallest amount of bad venues per neighborhood. Therefore let's go ahead and take cluster 1 and order it by the number of good venues per venue.

In [100]:
sorted_neigh = one_clust.sort_values(by=['# of Good Venues'], ascending=False)
sorted_neigh

Unnamed: 0,index,Postal Code,Borough,Neighborhood,Latitude,Longitude,# of Good Venues,# of Bad Venues,Ratio of good/bad venues,Cluster Label
48,75,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325,3,1,3,0.0
13,20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2,1,2,0.0
43,67,M4P,Central Toronto,Davisville North,43.712751,-79.390197,2,0,2,0.0
46,71,M1R,Scarborough,"Wexford, Maryvale",43.750072,-79.295849,2,0,2,0.0
25,44,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577,2,0,2,0.0
24,40,M3K,North York,Downsview East,43.737473,-79.464763,2,0,2,0.0
19,32,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,1,0,1,0.0
39,63,M6N,York,"Runnymede, The Junction North",43.673185,-79.487262,1,0,1,0.0
28,49,M6L,North York,"North Park, Maple Leaf Park, Upwood Park",43.713756,-79.490074,1,0,1,0.0
26,46,M3L,North York,Downsview West,43.739015,-79.506944,1,0,1,0.0


In [102]:
best_neigh = sorted_neigh.head(6).reset_index()
best_neigh

Unnamed: 0,level_0,index,Postal Code,Borough,Neighborhood,Latitude,Longitude,# of Good Venues,# of Bad Venues,Ratio of good/bad venues,Cluster Label
0,48,75,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325,3,1,3,0.0
1,13,20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2,1,2,0.0
2,43,67,M4P,Central Toronto,Davisville North,43.712751,-79.390197,2,0,2,0.0
3,46,71,M1R,Scarborough,"Wexford, Maryvale",43.750072,-79.295849,2,0,2,0.0
4,25,44,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577,2,0,2,0.0
5,24,40,M3K,North York,Downsview East,43.737473,-79.464763,2,0,2,0.0


We can see that there are only 6 neighborhoods that have at least 2 good venues within walking distance, so we're gonna take a closer look at these 6 neighborhoods. We're gonna find all the venues around the neighborhoods first.

In [108]:
good_neighbors = best_neigh["Neighborhood"].tolist()
good_neighbors

['Parkdale, Roncesvalles',
 'Berczy Park',
 'Davisville North',
 'Wexford, Maryvale',
 'Golden Mile, Clairlea, Oakridge',
 'Downsview East']

In [151]:
good_venue = pd.DataFrame(columns=['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 'Venue Name', 'Venue Latitude', 'Venue Longitude', 'Venue Category'])

good_venue = toronto_venues[toronto_venues['Neighborhood']=='Parkdale, Roncesvalles']

In [152]:
good_venue.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
933,"Parkdale, Roncesvalles",43.64896,-79.456325,Offleash Dog Trail - High Park,43.645485,-79.458747,Dog Run
934,"Parkdale, Roncesvalles",43.64896,-79.456325,The Chocolateria,43.649928,-79.450437,Dessert Shop
935,"Parkdale, Roncesvalles",43.64896,-79.456325,La Cubana,43.650912,-79.450909,Cuban Restaurant
936,"Parkdale, Roncesvalles",43.64896,-79.456325,Inter Steer,43.649796,-79.45031,Eastern European Restaurant
937,"Parkdale, Roncesvalles",43.64896,-79.456325,Revue Cinema,43.651112,-79.450961,Movie Theater


In [153]:
for j in range(5):
    good_venue = good_venues.append(toronto_venues[toronto_venues['Neighborhood']==good_neighbors[j+1]])
    
good_venue

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
933,"Parkdale, Roncesvalles",43.64896,-79.456325,Offleash Dog Trail - High Park,43.645485,-79.458747,Dog Run
934,"Parkdale, Roncesvalles",43.64896,-79.456325,The Chocolateria,43.649928,-79.450437,Dessert Shop
935,"Parkdale, Roncesvalles",43.64896,-79.456325,La Cubana,43.650912,-79.450909,Cuban Restaurant
936,"Parkdale, Roncesvalles",43.64896,-79.456325,Inter Steer,43.649796,-79.45031,Eastern European Restaurant
937,"Parkdale, Roncesvalles",43.64896,-79.456325,Revue Cinema,43.651112,-79.450961,Movie Theater
938,"Parkdale, Roncesvalles",43.64896,-79.456325,Domani Restaurant & Wine Bar,43.649235,-79.450229,Italian Restaurant
939,"Parkdale, Roncesvalles",43.64896,-79.456325,Cider House,43.650688,-79.450685,Restaurant
940,"Parkdale, Roncesvalles",43.64896,-79.456325,Reunion Island Coffee Bar,43.650463,-79.45061,Coffee Shop
941,"Parkdale, Roncesvalles",43.64896,-79.456325,Scout,43.65097,-79.450866,Gift Shop
942,"Parkdale, Roncesvalles",43.64896,-79.456325,Likely General,43.650622,-79.450635,Gift Shop


Now, we're gonna seperate out the good venues around these 6 neighborhoods, as well as the bad ones, and set them to their own dataframe.

In [158]:
g_venue = pd.DataFrame(columns=['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 'Venue Name', 'Venue Latitude', 'Venue Longitude', 'Venue Category'])

for j in range(len(good_venues)):
    g_venue = g_venue.append(good_venue[good_venue['Venue Category'] == good_venues[j]])

g_venue

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
550,Downsview East,43.737473,-79.464763,Toronto Downsview Airport (YZD),43.738883,-79.470111,Airport
550,Downsview East,43.737473,-79.464763,Toronto Downsview Airport (YZD),43.738883,-79.470111,Airport
639,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577,Warden Subway Station,43.711229,-79.279602,Metro Station
640,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577,Warden Station Bus Loop,43.711241,-79.279576,Bus Station
229,Berczy Park,43.644771,-79.373306,Hockey Hall Of Fame (Hockey Hall of Fame),43.646974,-79.377323,Museum
884,"Wexford, Maryvale",43.750072,-79.295849,Wexford Heights Plaza,43.746136,-79.293782,Shopping Mall
841,Davisville North,43.712751,-79.390197,Windowrama by Paul,43.712185,-79.395317,Construction & Landscaping
944,"Parkdale, Roncesvalles",43.64896,-79.456325,A Good Read,43.64947,-79.450339,Bookstore
885,"Wexford, Maryvale",43.750072,-79.295849,Scarborough Garage Door Repair,43.751288,-79.301508,Auto Garage
945,"Parkdale, Roncesvalles",43.64896,-79.456325,Aris Grill,43.650091,-79.450396,Breakfast Spot


In [160]:
b_venue = pd.DataFrame(columns=['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 'Venue Name', 'Venue Latitude', 'Venue Longitude', 'Venue Category'])

for i in range(len(bad_venues)):
    b_venue = b_venue.append(good_venue[good_venue['Venue Category'] == bad_venues[i]])

b_venue

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
940,"Parkdale, Roncesvalles",43.64896,-79.456325,Reunion Island Coffee Bar,43.650463,-79.45061,Coffee Shop
253,Berczy Park,43.644771,-79.373306,Mos Mos,43.64164,-79.377552,Coffee Shop


Now, let's map our 6 neighborhoods to a new folium map. We'll then map the good venues in green and the bad venues in red, so it's a bit easy to read and interpret.

In [163]:
neigh_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lon, cat in zip(g_venue['Venue Latitude'], g_venue['Venue Longitude'], g_venue['Venue Category']):
    label = folium.Popup(str(cat), parse_html=True)
    folium.CircleMarker(
        [lat,lon],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color = 'green',
        fill_opacity = 0.7).add_to(neigh_clusters)

for lat, lon, cat in zip(b_venue['Venue Latitude'], b_venue['Venue Longitude'], b_venue['Venue Category']):
    label = folium.Popup(str(cat), parse_html=True)
    folium.CircleMarker(
        [lat,lon],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color = 'red',
        fill_opacity = 0.7).add_to(neigh_clusters)
    
for lat, lon, neigh in zip(best_neigh['Latitude'], best_neigh['Longitude'], best_neigh['Neighborhood']):
    label = folium.Popup(str(neigh), parse_html=True)
    folium.CircleMarker(
        [lat,lon],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color = 'black',
        fill_opacity = 0.7).add_to(neigh_clusters)

neigh_clusters