# Notebook for Capstone project: Battle of Neighborhoods


In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-2.0.0                |     pyh9f0ad1d_0          63 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0          conda-forge
    geopy:           

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M" 
data = pd.read_html(url, header = 0)
data2 = data[0]
NA = data2[ data2['Borough'] == "Not assigned" ].index
 
# Delete these row from dataFrame
data2.drop(NA , inplace=True)
data2.reset_index(drop=True, inplace=True)
data2.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [3]:
#import geodata with geographical cordinates and add to the Toronto Neigborhood data frame
geo_data = pd.read_csv("http://cocl.us/Geospatial_data")
neighborhoods = pd.merge(data2, geo_data, how = "inner", on = "Postal Code")
neighborhoods.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


#### Use geopy library to get the latitude and longitude values of Toronto.

In [5]:
address = 'Toronto'

geolocator = Nominatim(user_agent="tr_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### Create a map of Toronto with neighborhoods superimposed on top.

In [6]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### There are 13 Priority Investment Neighborhoods (PIN) in Toronto.

#### We will narrow down our options from previous research to these neighborhoods and explore these 13 neighborhhods to find out which may be the best for Launching Department store.
#### They are as follows:

- Eglinton East-Kennedy Park</a>
- Crescent Town</a>
- Dorset Park</a>
- Flemmingdon Park-Victoria Village</a>
- Jamestown</a>
- Jane-Finch</a>
- Kingston-Galloway</a>
- Lawrence Heights</a>
- Malvern</a>
- Scarborough Village</a>
- Steeles-L'Amoreaux</a>
- Westminster-Brownson</a>
- Weston-Mt.Dennis</a>

See url : http://3cities.neighbourhoodchange.ca/files/2011/05/2006-Torontos-13-Priority-Nhoods-Method-for-selecting-presentation.pdf

### Creating a Dataframe of the Priority Investment Neighborhood (PIN) with Postal Codes

In [7]:
pin_dict = {"Postal Code": ["M1K","M4C","M1P","M3C","M4X","M3N","M1E","M6A","M1B","M1J","M1W","M6R","M9N",], 
            "Neighborhood" :["Eglinton East-Kennedy Park","Crescent Town","Dorset Park","Flemingdon Park-Victoria Village",
                            "St.Jamestown","Jane-Finch","Kingston-Galloway","Lawrence Heights","Malvern","Scarborough Village",
                            "Steeles-L'Amoreaux","Westminster-Brownson","Weston-Mt.Dennis"]} 
        

In [8]:
df = pd.DataFrame (pin_dict, columns = ['Postal Code','Neighborhood'])
df

Unnamed: 0,Postal Code,Neighborhood
0,M1K,Eglinton East-Kennedy Park
1,M4C,Crescent Town
2,M1P,Dorset Park
3,M3C,Flemingdon Park-Victoria Village
4,M4X,St.Jamestown
5,M3N,Jane-Finch
6,M1E,Kingston-Galloway
7,M6A,Lawrence Heights
8,M1B,Malvern
9,M1J,Scarborough Village


### Merge Neighborhoods data frame with PIN Dataframe

In [9]:
neighborhoods.drop(columns = "Neighborhood", axis = 1, inplace = True)
neighborhoods

Unnamed: 0,Postal Code,Borough,Latitude,Longitude
0,M3A,North York,43.753259,-79.329656
1,M4A,North York,43.725882,-79.315572
2,M5A,Downtown Toronto,43.65426,-79.360636
3,M6A,North York,43.718518,-79.464763
4,M7A,Downtown Toronto,43.662301,-79.389494
5,M9A,Etobicoke,43.667856,-79.532242
6,M1B,Scarborough,43.806686,-79.194353
7,M3B,North York,43.745906,-79.352188
8,M4B,East York,43.706397,-79.309937
9,M5B,Downtown Toronto,43.657162,-79.378937


In [10]:
pin_data = pd.merge(df, neighborhoods, how = "inner", on = "Postal Code")
pin_data

Unnamed: 0,Postal Code,Neighborhood,Borough,Latitude,Longitude
0,M1K,Eglinton East-Kennedy Park,Scarborough,43.727929,-79.262029
1,M4C,Crescent Town,East York,43.695344,-79.318389
2,M1P,Dorset Park,Scarborough,43.75741,-79.273304
3,M3C,Flemingdon Park-Victoria Village,North York,43.7259,-79.340923
4,M4X,St.Jamestown,Downtown Toronto,43.667967,-79.367675
5,M3N,Jane-Finch,North York,43.761631,-79.520999
6,M1E,Kingston-Galloway,Scarborough,43.763573,-79.188711
7,M6A,Lawrence Heights,North York,43.718518,-79.464763
8,M1B,Malvern,Scarborough,43.806686,-79.194353
9,M1J,Scarborough Village,Scarborough,43.744734,-79.239476


#### Define Foursquare Credentials and Version

In [11]:
# Use foursquare APIcrednetials to enable access to venues data from the four square database
CLIENT_ID = 'OF35QXMM5TXNQ0BXPCP42UBURIZXZMCWFQ34O0W0HKPNVLUV' # Foursquare ID
CLIENT_SECRET = '2ROJOXMVFYBNWT5KZT4MAAMIKJ2OGC32A1OYZRPF52TKDPVH' # Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: OF35QXMM5TXNQ0BXPCP42UBURIZXZMCWFQ34O0W0HKPNVLUV
CLIENT_SECRET:2ROJOXMVFYBNWT5KZT4MAAMIKJ2OGC32A1OYZRPF52TKDPVH


#### Explore the 13 PINs in our dataframe.

In [12]:
# Neighborhood 1 - Eglinton East-Kennedy Park
Kennedy_Park_Lat = 43.727929
Kennedy_Park_Lng = 79.262029

In [13]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    Kennedy_Park_Lat, 
    Kennedy_Park_Lng)

In [14]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f01e0245947492e3e387695'},
 'response': {'suggestedRadius': 30000,
  'headerLocation': 'Current map view',
  'headerFullLocation': 'Current map view',
  'headerLocationGranularity': 'unknown',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 43.795160718106665,
    'lng': 79.49670143127442},
   'sw': {'lat': 43.46582710424381, 'lng': 79.19765739440918}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5392a89f498eb0e46ce7b303',
       'name': 'Нурниса',
       'location': {'lat': 43.78019100838563,
        'lng': 79.45421799230449,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.78019100838563,
          'lng': 79.45421799230449}],
        'distance': 16512,
        'cc': 'KZ',
        'country': 'Қазақста

In [15]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [16]:
# clean up json file and structure as a dataframe

venues = results['response']['groups'][0]['items']
    
all_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
all_venues =all_venues.loc[:, filtered_columns]

# filter the category for each row
all_venues['venue.categories'] = all_venues.apply(get_category_type, axis=1)

# clean columns
all_venues.columns = [col.split(".")[-1] for col in all_venues.columns]

all_venues

Unnamed: 0,name,categories,lat,lng
0,Нурниса,Cafeteria,43.780191,79.454218
1,Таскарасу,Train Station,43.737984,79.483109
2,Река Чарын,River,43.51868,79.251251
3,У Гули,Café,43.539589,79.445572
4,Чарынский Каньен,Nature Preserve,43.480797,79.21125


### Lets create a function that returns all venues for each Neighborhood in our PIN DF

In [17]:
# Coordinates
toronto_Lat = 43.6534817
toronto_Lng = -79.3839347

In [18]:
LIMIT = 500 # limit of number of venues returned by Foursquare API

radius = 2000 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    toronto_Lat, 
    toronto_Lng, 
    radius, 
    LIMIT)

In [19]:
venue_results = requests.get(url).json()

In [20]:
# clean up json file and structure as a dataframe

venues_raw = venue_results['response']['groups'][0]['items']
    
tr_venues = json_normalize(venues_raw) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
tr_venues =tr_venues.loc[:, filtered_columns]

# filter the category for each row
tr_venues['venue.categories'] = tr_venues.apply(get_category_type, axis=1)

# clean columns
tr_venues.columns = [col.split(".")[-1] for col in tr_venues.columns]

tr_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Nathan Phillips Square,Plaza,43.65227,-79.383516
2,Elgin And Winter Garden Theatres,Theater,43.653394,-79.378507
3,Indigo,Bookstore,43.653515,-79.380696
4,UNIQLO ユニクロ,Clothing Store,43.65591,-79.380641


In [47]:
tr_venues.shape

(100, 4)

In [22]:
#define a function that will repeat the process above for PIN neighborhood in Toronto
def getPINVenues(names, latitudes, longitudes, radius=2000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    pin_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    pin_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(pin_venues)

In [23]:
pin_venues_df = getPINVenues(names=pin_data['Neighborhood'],
                                   latitudes=pin_data['Latitude'],
                                   longitudes=pin_data['Longitude']
                                  )

Eglinton East-Kennedy Park
Crescent Town
Dorset Park
Flemingdon Park-Victoria Village
St.Jamestown
Jane-Finch
Kingston-Galloway
Lawrence Heights
Malvern
Scarborough Village
Steeles-L'Amoreaux
Westminster-Brownson
Weston-Mt.Dennis


In [24]:
pin_venues_df.shape

(996, 7)

In [25]:
# Number of venues returned for each PIN Neighborhood
pin_venues_df.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Crescent Town,100,100,100,100,100,100
Dorset Park,95,95,95,95,95,95
Eglinton East-Kennedy Park,58,58,58,58,58,58
Flemingdon Park-Victoria Village,95,95,95,95,95,95
Jane-Finch,48,48,48,48,48,48
Kingston-Galloway,39,39,39,39,39,39
Lawrence Heights,100,100,100,100,100,100
Malvern,49,49,49,49,49,49
Scarborough Village,77,77,77,77,77,77
St.Jamestown,100,100,100,100,100,100


# Analysing the Neighborhoods by Venues type

In [26]:
# one hot encoding
pin_onehot = pd.get_dummies(pin_venues_df[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
pin_onehot['Neighborhood'] = pin_venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [pin_onehot.columns[-1]] + list(pin_onehot.columns[:-1])
pin_onehot = pin_onehot[fixed_columns]

pin_onehot

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,American Restaurant,Amphitheater,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Badminton Court,Bakery,Bank,Bar,Baseball Field,Beach,Beer Bar,Beer Store,Big Box Store,Bike Shop,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Butcher,Café,Cantonese Restaurant,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Service,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Financial or Legal Service,Fish & Chips Shop,Fish Market,Flower Shop,Food & Drink Shop,Food Court,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Gas Station,Gastropub,Gay Bar,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gymnastics Gym,Hardware Store,Historic Site,History Museum,Hockey Arena,Hookah Bar,Hotel,Hotpot Restaurant,Hungarian Restaurant,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kitchen Supply Store,Korean Restaurant,Laser Tag,Latin American Restaurant,Leather Goods Store,Light Rail Station,Liquor Store,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Music Store,New American Restaurant,Noodle House,Optical Shop,Other Great Outdoors,Paintball Field,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Restaurant,Rock Climbing Spot,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,Spa,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Stables,Steakhouse,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park Ride / Attraction,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Video Game Store,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,Eglinton East-Kennedy Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
1,Eglinton East-Kennedy Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Eglinton East-Kennedy Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Eglinton East-Kennedy Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Eglinton East-Kennedy Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Eglinton East-Kennedy Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
6,Eglinton East-Kennedy Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Eglinton East-Kennedy Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,Eglinton East-Kennedy Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,Eglinton East-Kennedy Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [27]:
pin_onehot.shape

(996, 189)

# Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [28]:
pin_group = pin_onehot.groupby('Neighborhood').mean().reset_index()
pin_group

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,American Restaurant,Amphitheater,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Automotive Shop,BBQ Joint,Badminton Court,Bakery,Bank,Bar,Baseball Field,Beach,Beer Bar,Beer Store,Big Box Store,Bike Shop,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Butcher,Café,Cantonese Restaurant,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Donut Shop,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Service,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Financial or Legal Service,Fish & Chips Shop,Fish Market,Flower Shop,Food & Drink Shop,Food Court,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Gas Station,Gastropub,Gay Bar,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gymnastics Gym,Hardware Store,Historic Site,History Museum,Hockey Arena,Hookah Bar,Hotel,Hotpot Restaurant,Hungarian Restaurant,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kitchen Supply Store,Korean Restaurant,Laser Tag,Latin American Restaurant,Leather Goods Store,Light Rail Station,Liquor Store,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Music Store,New American Restaurant,Noodle House,Optical Shop,Other Great Outdoors,Paintball Field,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Restaurant,Rock Climbing Spot,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,Spa,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Stables,Steakhouse,Supermarket,Sushi Restaurant,Taiwanese Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park Ride / Attraction,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Video Game Store,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,Crescent Town,0.0,0.0,0.02,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.02,0.02,0.02,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.04,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.06,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.04,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.01,0.01,0.0,0.03,0.04,0.01,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Dorset Park,0.0,0.0,0.010526,0.0,0.0,0.0,0.021053,0.0,0.0,0.0,0.010526,0.010526,0.031579,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.021053,0.0,0.0,0.010526,0.031579,0.0,0.0,0.010526,0.021053,0.0,0.0,0.0,0.0,0.010526,0.0,0.010526,0.0,0.021053,0.0,0.0,0.0,0.084211,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.073684,0.010526,0.0,0.0,0.010526,0.0,0.010526,0.0,0.0,0.010526,0.0,0.010526,0.0,0.0,0.031579,0.0,0.0,0.0,0.0,0.0,0.0,0.021053,0.010526,0.0,0.010526,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.010526,0.021053,0.021053,0.010526,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.031579,0.0,0.0,0.031579,0.031579,0.042105,0.0,0.0,0.010526,0.0,0.0,0.031579,0.0,0.0,0.042105,0.0,0.0,0.010526,0.010526,0.0,0.0,0.0,0.0,0.021053,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.021053,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.010526,0.0,0.021053,0.0,0.0,0.0,0.0,0.0
2,Eglinton East-Kennedy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.017241,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.017241,0.017241,0.0,0.0,0.017241,0.017241,0.017241,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.068966,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.017241,0.017241,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.017241,0.068966,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.017241,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.017241,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.017241,0.051724,0.017241,0.0,0.0,0.0,0.017241,0.0,0.017241,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.017241,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.017241,0.0,0.0,0.0,0.017241,0.0,0.017241,0.0,0.0,0.0,0.0,0.0
3,Flemingdon Park-Victoria Village,0.0,0.010526,0.010526,0.0,0.010526,0.0,0.021053,0.0,0.0,0.010526,0.0,0.010526,0.021053,0.0,0.0,0.0,0.0,0.021053,0.0,0.010526,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.010526,0.010526,0.0,0.0,0.010526,0.010526,0.010526,0.0,0.073684,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.010526,0.0,0.010526,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.021053,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.010526,0.0,0.010526,0.031579,0.010526,0.0,0.0,0.0,0.010526,0.0,0.0,0.010526,0.0,0.0,0.010526,0.010526,0.021053,0.021053,0.042105,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.010526,0.031579,0.0,0.021053,0.0,0.010526,0.0,0.0,0.010526,0.0,0.010526,0.010526,0.042105,0.0,0.0,0.0,0.021053,0.031579,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.031579,0.0,0.010526,0.0,0.021053,0.0,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.010526,0.0,0.0,0.010526,0.0,0.021053,0.010526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021053,0.0,0.0,0.010526,0.0,0.0,0.0,0.010526,0.0,0.0,0.0,0.0
4,Jane-Finch,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.020833,0.0,0.166667,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.020833,0.0,0.0,0.020833,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.020833,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.020833,0.020833,0.0,0.020833,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Kingston-Galloway,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.025641,0.0,0.0,0.0,0.051282,0.025641,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.025641,0.128205,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Lawrence Heights,0.01,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.09,0.0,0.05,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.01,0.04,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.02,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.02,0.01,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.04,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0
7,Malvern,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.020408,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.040816,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.061224,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.040816,0.0,0.0,0.020408,0.0,0.0,0.020408,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.020408,0.0,0.0,0.0,0.0,0.040816,0.0,0.0,0.0,0.020408,0.0,0.040816,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040816,0.346939
8,Scarborough Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025974,0.012987,0.0,0.0,0.0,0.025974,0.025974,0.0,0.012987,0.0,0.0,0.012987,0.025974,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.012987,0.0,0.0,0.0,0.012987,0.0,0.103896,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.025974,0.012987,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.103896,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.012987,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.051948,0.025974,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.025974,0.012987,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025974,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.012987,0.012987,0.0,0.0,0.0,0.051948,0.064935,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.051948,0.0,0.0,0.012987,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.012987,0.0,0.0,0.0
9,St.Jamestown,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.06,0.02,0.01,0.0,0.0,0.01,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.03,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.01,0.0,0.03,0.03,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.02,0.02,0.03,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0


# Print each neighborhood along with the top 10 most common venue

In [29]:
pin_group.columns

Index(['Neighborhood', 'Accessories Store', 'Afghan Restaurant',
       'American Restaurant', 'Amphitheater', 'Art Gallery',
       'Arts & Crafts Store', 'Asian Restaurant', 'Athletics & Sports',
       'Automotive Shop',
       ...
       'Vietnamese Restaurant', 'Volleyball Court', 'Warehouse Store',
       'Wine Shop', 'Wings Joint', 'Women's Store', 'Xinjiang Restaurant',
       'Yoga Studio', 'Zoo', 'Zoo Exhibit'],
      dtype='object', length=189)

In [33]:
top_venues = 10

for hood in pin_group['Neighborhood']:
    print("----"+hood+"----")
    temp = pin_group[pin_group['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(top_venues))
    print('\n')

----Crescent Town----
                  venue  freq
0                  Park  0.06
1           Coffee Shop  0.06
2             Gastropub  0.04
3           Pizza Place  0.04
4                  Café  0.04
5  Ethiopian Restaurant  0.03
6       Thai Restaurant  0.03
7        Ice Cream Shop  0.03
8              Pharmacy  0.03
9        Breakfast Spot  0.02


----Dorset Park----
                  venue  freq
0           Coffee Shop  0.08
1  Fast Food Restaurant  0.07
2           Pizza Place  0.04
3        Sandwich Place  0.04
4                  Bank  0.03
5           Gas Station  0.03
6             Pet Store  0.03
7        Breakfast Spot  0.03
8              Pharmacy  0.03
9                  Park  0.03


----Eglinton East-Kennedy Park----
                  venue  freq
0         Grocery Store  0.07
1    Chinese Restaurant  0.07
2           Coffee Shop  0.07
3  Fast Food Restaurant  0.07
4              Pharmacy  0.05
5            Beer Store  0.03
6        Sandwich Place  0.03
7        Discount S

# Putting the Most Common venues in a Data Frame

In [34]:
# define a function to sort the venues in descending order
def return_most_common_venues(row, top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:top_venues]

In [35]:
# this function creates the new dataframe and display the top 10 venues for each neighborhood
top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
pin_venues_sorted = pd.DataFrame(columns=columns)
pin_venues_sorted['Neighborhood'] = pin_group['Neighborhood']

for ind in np.arange(pin_group.shape[0]):
    pin_venues_sorted.iloc[ind, 1:] = return_most_common_venues(pin_group.iloc[ind, :], top_venues)

pin_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Crescent Town,Park,Coffee Shop,Gastropub,Café,Pizza Place,Thai Restaurant,Ice Cream Shop,Ethiopian Restaurant,Pharmacy,Bar
1,Dorset Park,Coffee Shop,Fast Food Restaurant,Sandwich Place,Pizza Place,Restaurant,Breakfast Spot,Bank,Gas Station,Pharmacy,Pet Store
2,Eglinton East-Kennedy Park,Coffee Shop,Fast Food Restaurant,Grocery Store,Chinese Restaurant,Pharmacy,Beer Store,Bank,Sandwich Place,Discount Store,Pizza Place
3,Flemingdon Park-Victoria Village,Coffee Shop,Restaurant,Japanese Restaurant,Park,Gym,Middle Eastern Restaurant,Pizza Place,Sandwich Place,Pharmacy,Bank
4,Jane-Finch,Coffee Shop,Pizza Place,Fast Food Restaurant,Gas Station,Hotel,Grocery Store,Pharmacy,Smoothie Shop,Tea Room,Kitchen Supply Store
5,Kingston-Galloway,Pizza Place,Breakfast Spot,Fast Food Restaurant,Bank,Coffee Shop,Park,Greek Restaurant,Fried Chicken Joint,Sandwich Place,Juice Bar
6,Lawrence Heights,Clothing Store,Coffee Shop,Fast Food Restaurant,Furniture / Home Store,Vietnamese Restaurant,Grocery Store,Pizza Place,Jewelry Store,Pharmacy,Pet Store
7,Malvern,Zoo Exhibit,Fast Food Restaurant,Pizza Place,Zoo,Gas Station,Coffee Shop,Restaurant,Greek Restaurant,Caribbean Restaurant,Skating Rink
8,Scarborough Village,Coffee Shop,Fast Food Restaurant,Pizza Place,Sandwich Place,Grocery Store,Pharmacy,Big Box Store,Beer Store,Ice Cream Shop,Bank
9,St.Jamestown,Park,Coffee Shop,Café,Bakery,Diner,Japanese Restaurant,Restaurant,Thai Restaurant,Italian Restaurant,Gastropub


# Cluster Neighborhoods

In [36]:
# Run K-means to cluster neighborhhods into 3

# set number of clusters
kclusters = 3

pin_cluster = pin_group.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(pin_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:5] 

array([0, 1, 1, 0, 1], dtype=int32)

#### Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [39]:
# add clustering labels

pin_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [42]:
# merge original data with Cordinates to the data frame

pin_clustered = pd.merge(pin_data, pin_venues_sorted, how = "inner", on = "Neighborhood")
pin_clustered

Unnamed: 0,Postal Code,Neighborhood,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1K,Eglinton East-Kennedy Park,Scarborough,43.727929,-79.262029,1,Coffee Shop,Fast Food Restaurant,Grocery Store,Chinese Restaurant,Pharmacy,Beer Store,Bank,Sandwich Place,Discount Store,Pizza Place
1,M4C,Crescent Town,East York,43.695344,-79.318389,0,Park,Coffee Shop,Gastropub,Café,Pizza Place,Thai Restaurant,Ice Cream Shop,Ethiopian Restaurant,Pharmacy,Bar
2,M1P,Dorset Park,Scarborough,43.75741,-79.273304,1,Coffee Shop,Fast Food Restaurant,Sandwich Place,Pizza Place,Restaurant,Breakfast Spot,Bank,Gas Station,Pharmacy,Pet Store
3,M3C,Flemingdon Park-Victoria Village,North York,43.7259,-79.340923,0,Coffee Shop,Restaurant,Japanese Restaurant,Park,Gym,Middle Eastern Restaurant,Pizza Place,Sandwich Place,Pharmacy,Bank
4,M4X,St.Jamestown,Downtown Toronto,43.667967,-79.367675,0,Park,Coffee Shop,Café,Bakery,Diner,Japanese Restaurant,Restaurant,Thai Restaurant,Italian Restaurant,Gastropub
5,M3N,Jane-Finch,North York,43.761631,-79.520999,1,Coffee Shop,Pizza Place,Fast Food Restaurant,Gas Station,Hotel,Grocery Store,Pharmacy,Smoothie Shop,Tea Room,Kitchen Supply Store
6,M1E,Kingston-Galloway,Scarborough,43.763573,-79.188711,1,Pizza Place,Breakfast Spot,Fast Food Restaurant,Bank,Coffee Shop,Park,Greek Restaurant,Fried Chicken Joint,Sandwich Place,Juice Bar
7,M6A,Lawrence Heights,North York,43.718518,-79.464763,1,Clothing Store,Coffee Shop,Fast Food Restaurant,Furniture / Home Store,Vietnamese Restaurant,Grocery Store,Pizza Place,Jewelry Store,Pharmacy,Pet Store
8,M1B,Malvern,Scarborough,43.806686,-79.194353,2,Zoo Exhibit,Fast Food Restaurant,Pizza Place,Zoo,Gas Station,Coffee Shop,Restaurant,Greek Restaurant,Caribbean Restaurant,Skating Rink
9,M1J,Scarborough Village,Scarborough,43.744734,-79.239476,1,Coffee Shop,Fast Food Restaurant,Pizza Place,Sandwich Place,Grocery Store,Pharmacy,Big Box Store,Beer Store,Ice Cream Shop,Bank


# Visualizing Cluster Results

In [43]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(pin_clustered['Latitude'], pin_clustered['Longitude'], pin_clustered['Neighborhood'], pin_clustered['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examining the Clusters

### Cluster 1

In [44]:
pin_clustered.loc[pin_clustered['Cluster Labels'] == 0, pin_clustered.columns[[1] + list(range(5, pin_clustered.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Crescent Town,0,Park,Coffee Shop,Gastropub,Café,Pizza Place,Thai Restaurant,Ice Cream Shop,Ethiopian Restaurant,Pharmacy,Bar
3,Flemingdon Park-Victoria Village,0,Coffee Shop,Restaurant,Japanese Restaurant,Park,Gym,Middle Eastern Restaurant,Pizza Place,Sandwich Place,Pharmacy,Bank
4,St.Jamestown,0,Park,Coffee Shop,Café,Bakery,Diner,Japanese Restaurant,Restaurant,Thai Restaurant,Italian Restaurant,Gastropub
11,Westminster-Brownson,0,Café,Coffee Shop,Bakery,Park,Italian Restaurant,Bar,Restaurant,Gastropub,Eastern European Restaurant,Breakfast Spot


### Cluster 2

In [45]:
pin_clustered.loc[pin_clustered['Cluster Labels'] == 1, pin_clustered.columns[[1] + list(range(5, pin_clustered.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Eglinton East-Kennedy Park,1,Coffee Shop,Fast Food Restaurant,Grocery Store,Chinese Restaurant,Pharmacy,Beer Store,Bank,Sandwich Place,Discount Store,Pizza Place
2,Dorset Park,1,Coffee Shop,Fast Food Restaurant,Sandwich Place,Pizza Place,Restaurant,Breakfast Spot,Bank,Gas Station,Pharmacy,Pet Store
5,Jane-Finch,1,Coffee Shop,Pizza Place,Fast Food Restaurant,Gas Station,Hotel,Grocery Store,Pharmacy,Smoothie Shop,Tea Room,Kitchen Supply Store
6,Kingston-Galloway,1,Pizza Place,Breakfast Spot,Fast Food Restaurant,Bank,Coffee Shop,Park,Greek Restaurant,Fried Chicken Joint,Sandwich Place,Juice Bar
7,Lawrence Heights,1,Clothing Store,Coffee Shop,Fast Food Restaurant,Furniture / Home Store,Vietnamese Restaurant,Grocery Store,Pizza Place,Jewelry Store,Pharmacy,Pet Store
9,Scarborough Village,1,Coffee Shop,Fast Food Restaurant,Pizza Place,Sandwich Place,Grocery Store,Pharmacy,Big Box Store,Beer Store,Ice Cream Shop,Bank
10,Steeles-L'Amoreaux,1,Chinese Restaurant,Coffee Shop,Park,Bakery,Sandwich Place,Pizza Place,Pharmacy,Bank,Japanese Restaurant,Athletics & Sports
12,Weston-Mt.Dennis,1,Coffee Shop,Grocery Store,Sandwich Place,Pharmacy,Pizza Place,Bank,Gas Station,Fried Chicken Joint,Vietnamese Restaurant,Train Station


### Cluster 3

In [46]:
pin_clustered.loc[pin_clustered['Cluster Labels'] == 2, pin_clustered.columns[[1] + list(range(5, pin_clustered.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Malvern,2,Zoo Exhibit,Fast Food Restaurant,Pizza Place,Zoo,Gas Station,Coffee Shop,Restaurant,Greek Restaurant,Caribbean Restaurant,Skating Rink
