In [2]:
# importing libraries
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
from bs4 import BeautifulSoup
import requests # library to handle requests
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
import geopy.geocoders # convert an address into latitude and longitude values

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries are imported.')

Collecting package metadata: done
Solving environment: done

# All requested packages already installed.

Collecting package metadata: done
Solving environment: done

# All requested packages already installed.

Libraries are imported.


In [7]:
# Loading the dataset which is about postal codes in Toronto
# This dataset was created in week 3. 
df_toronto = pd.read_csv('toronto_base.csv')
df_toronto.head()

Unnamed: 0.1,Unnamed: 0,Borough,Neighbourhood,Postcode,Latitude,Longitude
0,0,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
1,1,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,2,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,3,Scarborough,Woburn,M1G,43.770992,-79.216917
4,4,Scarborough,Cedarbrae,M1H,43.773136,-79.239476


## Creating a Map of Toronto

In [8]:
# for the city Toronto, latitude and longtitude are manually extracted via google search
toronto_latitude = 43.6932; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto

## Limiting analysis to a specific borough - North York

In [10]:
# selecting only neighborhoods regarding to "North York" borough.
northyork_data = df_toronto[df_toronto['Borough'] == 'North York']
northyork_data = northyork_data.reset_index(drop=True).drop(columns = 'Unnamed: 0')
northyork_data.head()

Unnamed: 0,Borough,Neighbourhood,Postcode,Latitude,Longitude
0,North York,Hillcrest Village,M2H,43.803762,-79.363452
1,North York,"Fairview, Henry Farm, Oriole",M2J,43.778517,-79.346556
2,North York,Bayview Village,M2K,43.786947,-79.385975
3,North York,"Silver Hills, York Mills",M2L,43.75749,-79.374714
4,North York,"Newtonbrook, Willowdale",M2M,43.789053,-79.408493


## Map the North York neighborhoods

In [11]:
address_scar = 'North York, Toronto'
latitude_nyork = 43.7615
longitude_nyork = -79.4111

map_northyork = folium.Map(location=[latitude_nyork, longitude_nyork], zoom_start=11.5)

# add markers to map
for lat, lng, label in zip(northyork_data['Latitude'], northyork_data['Longitude'], northyork_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 10,
        popup = label,
        color ='blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(map_northyork)  
    
map_northyork

In [None]:
# @hiddel_cell
CLIENT_ID = 'KSEAWSCOWD2G1GQ1R2T4LHDF4UQ1T0550J24QAYJ1BYLFBQI' # your Foursquare ID
CLIENT_SECRET = 'V4GDOHFZZAKPFVAY4XQO5MOE2HCSJR43BRBOZ2QE5EJNOV2Y' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

In [13]:
def foursquare_crawler (postal_code_list, neighborhood_list, lat_list, lng_list, LIMIT = 500, radius = 1000):
    result_ds = []
    counter = 0
    for postal_code, neighborhood, lat, lng in zip(postal_code_list, neighborhood_list, lat_list, lng_list):
         
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, VERSION, 
            lat, lng, radius, LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        tmp_dict = {}
        tmp_dict['Postal Code'] = postal_code; tmp_dict['Neighborhood(s)'] = neighborhood; 
        tmp_dict['Latitude'] = lat; tmp_dict['Longitude'] = lng;
        tmp_dict['Crawling_result'] = results;
        result_ds.append(tmp_dict)
        counter += 1
        print('{}.'.format(counter))
        print('Data is Obtained, for the Postal Code {} (and Neighborhoods {}) SUCCESSFULLY.'.format(postal_code, neighborhood))
    return result_ds;

In [14]:
print('Crawling different neighborhoods inside "North York"')
northyork_foursquare_dataset = foursquare_crawler(list(northyork_data['Postcode']),
                                                   list(northyork_data['Neighbourhood']),
                                                   list(northyork_data['Latitude']),
                                                   list(northyork_data['Longitude']),)

Crawling different neighborhoods inside "North York"
1.
Data is Obtained, for the Postal Code M2H (and Neighborhoods Hillcrest Village) SUCCESSFULLY.
2.
Data is Obtained, for the Postal Code M2J (and Neighborhoods Fairview, Henry Farm, Oriole) SUCCESSFULLY.
3.
Data is Obtained, for the Postal Code M2K (and Neighborhoods Bayview Village) SUCCESSFULLY.
4.
Data is Obtained, for the Postal Code M2L (and Neighborhoods Silver Hills, York Mills) SUCCESSFULLY.
5.
Data is Obtained, for the Postal Code M2M (and Neighborhoods Newtonbrook, Willowdale) SUCCESSFULLY.
6.
Data is Obtained, for the Postal Code M2N (and Neighborhoods Willowdale South) SUCCESSFULLY.
7.
Data is Obtained, for the Postal Code M2P (and Neighborhoods York Mills West) SUCCESSFULLY.
8.
Data is Obtained, for the Postal Code M2R (and Neighborhoods Willowdale West) SUCCESSFULLY.
9.
Data is Obtained, for the Postal Code M3A (and Neighborhoods Parkwoods) SUCCESSFULLY.
10.
Data is Obtained, for the Postal Code M3B (and Neighborhoods 

## Save data obtained

In [15]:
import pickle
with open("northyork_foursquare_dataset.txt", "wb") as fp:   #Pickling
    pickle.dump(northyork_foursquare_dataset, fp)

Received Data from Internet is Saved to Computer.


## Recover data saved == unpickle

In [16]:
with open("northyork_foursquare_dataset.txt", "rb") as fp:   # Unpickling
    northyork_foursquare_dataset = pickle.load(fp)

## Clean data

In [18]:
# This function is created to connect to the saved list which is the received database. It will extract each venue 
# for every neighborhood inside the database

def get_venue_dataset(foursquare_dataset):
    result_df = pd.DataFrame(columns = ['Postal Code', 'Neighborhood', 
                                           'Neighborhood Latitude', 'Neighborhood Longitude',
                                          'Venue', 'Venue Summary', 'Venue Category', 'Distance'])
    # print(result_df)
    
    for neigh_dict in foursquare_dataset:
        postal_code = neigh_dict['Postal Code']; neigh = neigh_dict['Neighborhood(s)']
        lat = neigh_dict['Latitude']; lng = neigh_dict['Longitude']
        print('Number of Venuse in Coordination "{}" Posal Code and "{}" Negihborhood(s) is:'.format(postal_code, neigh))
        print(len(neigh_dict['Crawling_result']))
        
        for venue_dict in neigh_dict['Crawling_result']:
            summary = venue_dict['reasons']['items'][0]['summary']
            name = venue_dict['venue']['name']
            dist = venue_dict['venue']['location']['distance']
            cat =  venue_dict['venue']['categories'][0]['name']
            
            
            # print({'Postal Code': postal_code, 'Neighborhood': neigh, 
            #                   'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
            #                   'Venue': name, 'Venue Summary': summary, 
            #                   'Venue Category': cat, 'Distance': dist})
            
            result_df = result_df.append({'Postal Code': postal_code, 'Neighborhood': neigh, 
                              'Neighborhood Latitude': lat, 'Neighborhood Longitude':lng,
                              'Venue': name, 'Venue Summary': summary, 
                              'Venue Category': cat, 'Distance': dist}, ignore_index = True)
            # print(result_df)
    
    return(result_df)

In [19]:
northyork_venues = get_venue_dataset(northyork_foursquare_dataset)


Number of Venuse in Coordination "M2H" Posal Code and "Hillcrest Village" Negihborhood(s) is:
21
Number of Venuse in Coordination "M2J" Posal Code and "Fairview, Henry Farm, Oriole" Negihborhood(s) is:
44
Number of Venuse in Coordination "M2K" Posal Code and "Bayview Village" Negihborhood(s) is:
13
Number of Venuse in Coordination "M2L" Posal Code and "Silver Hills, York Mills" Negihborhood(s) is:
4
Number of Venuse in Coordination "M2M" Posal Code and "Newtonbrook, Willowdale" Negihborhood(s) is:
31
Number of Venuse in Coordination "M2N" Posal Code and "Willowdale South" Negihborhood(s) is:
100
Number of Venuse in Coordination "M2P" Posal Code and "York Mills West" Negihborhood(s) is:
15
Number of Venuse in Coordination "M2R" Posal Code and "Willowdale West" Negihborhood(s) is:
11
Number of Venuse in Coordination "M3A" Posal Code and "Parkwoods" Negihborhood(s) is:
28
Number of Venuse in Coordination "M3B" Posal Code and "Don Mills North" Negihborhood(s) is:
29
Number of Venuse in Coo

In [25]:
northyork_neigh = list(northyork_venues['Neighborhood'].unique())
print('Number of Neighborhoods inside North York:', len(northyork_neigh))

Number of Neighborhoods inside North York: 24


In [30]:
print('List of different venue categories:')
list(northyork_venues['Venue Category'].unique())

List of different venue categories:


['Bakery',
 'Korean Restaurant',
 'Grocery Store',
 'Park',
 'Coffee Shop',
 'Bank',
 'Pizza Place',
 'Sandwich Place',
 'Pharmacy',
 'Housing Development',
 'Chinese Restaurant',
 'Ice Cream Shop',
 'Shopping Mall',
 'Recreation Center',
 'Pool',
 'Residential Building (Apartment / Condo)',
 'Diner',
 'Convenience Store',
 'Toy / Game Store',
 'Movie Theater',
 'Electronics Store',
 'Burger Joint',
 'Tea Room',
 'American Restaurant',
 'Department Store',
 'Candy Store',
 'Salon / Barbershop',
 'Smoothie Shop',
 'Fast Food Restaurant',
 'Clothing Store',
 'Juice Bar',
 'Caribbean Restaurant',
 'Japanese Restaurant',
 'Theater',
 'Food Court',
 'Restaurant',
 'Supermarket',
 'Sporting Goods Shop',
 'Cosmetics Shop',
 'Video Game Store',
 'Beer Store',
 'Fried Chicken Joint',
 'Café',
 'Skating Rink',
 'Skate Park',
 'Trail',
 'Asian Restaurant',
 'Hookah Bar',
 'Middle Eastern Restaurant',
 'Dessert Shop',
 'Hot Dog Joint',
 'Indian Restaurant',
 'Ramen Restaurant',
 'Bus Line',
 'Bus 

In [31]:
# one hot encoding
northyork_onehot = pd.get_dummies(data = northyork_venues, drop_first  = False, 
                              prefix = "", prefix_sep = "", columns = ['Venue Category'])
northyork_onehot.head()

Unnamed: 0,Postal Code,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Summary,Distance,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beer Store,Bike Shop,Boutique,Bowling Alley,Breakfast Spot,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Community Center,Convenience Store,Cosmetics Shop,Creperie,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fireworks Store,Fish & Chips Shop,Fish Market,Food & Drink Shop,Food Court,Frame Store,Fraternity House,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,General Entertainment,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Health Food Store,History Museum,Hockey Arena,Home Service,Hookah Bar,Hot Dog Joint,Hotel,Housing Development,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Karaoke Bar,Kitchen Supply Store,Korean Restaurant,Latin American Restaurant,Laundry Service,Liquor Store,Lounge,Massage Studio,Mediterranean Restaurant,Men's Store,Metro Station,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Moving Target,Optical Shop,Paper / Office Supplies Store,Park,Pet Store,Pharmacy,Photography Lab,Pizza Place,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Recreation Center,Rental Car Location,Residential Building (Apartment / Condo),Restaurant,Road,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Skate Park,Skating Rink,Ski Area,Ski Chalet,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Spa,Sporting Goods Shop,Sports Bar,Sports Club,Steakhouse,Storage Facility,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store,Yoga Studio
0,M2H,Hillcrest Village,43.803762,-79.363452,Tastee,This spot is popular,692,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,M2H,Hillcrest Village,43.803762,-79.363452,고려삼계탕 Korean Ginseng Chicken Soup & Bibimbap,This spot is popular,754,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,M2H,Hillcrest Village,43.803762,-79.363452,Galati,This spot is popular,815,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,M2H,Hillcrest Village,43.803762,-79.363452,Cummer Park,This spot is popular,776,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,M2H,Hillcrest Village,43.803762,-79.363452,Tim Hortons,This spot is popular,731,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Manually selecting features

In [37]:
important_list_of_features = [
 'Neighborhood',
 'Neighborhood Latitude',
 'Neighborhood Longitude',
 'Korean Restaurant',
 'Chinese Restaurant',
 'Diner',
 'Burger Joint',
 'American Restaurant',
 'Caribbean Restaurant',
 'Japanese Restaurant',
 'Food Court',
 'Restaurant',
 'Fried Chicken Joint',
 'Asian Restaurant',
 'Middle Eastern Restaurant',
 'Hot Dog Joint',
 'Indian Restaurant',
 'Ramen Restaurant',
 'Steakhouse',
 'Seafood Restaurant',
 'Indonesian Restaurant',
 'Sushi Restaurant',
 'Burrito Place',
 'Sports Bar',
 'Vietnamese Restaurant',
 'Bar',
 'Karaoke Bar',
 'Italian Restaurant',
 'Pub',
 'French Restaurant',
 'Eastern European Restaurant',
 'Thai Restaurant',
 'Greek Restaurant',
 'Dim Sum Restaurant',
 'Mediterranean Restaurant',
 'Falafel Restaurant',
 'Turkish Restaurant',
 'Latin American Restaurant',
 'Portuguese Restaurant',
 'Comfort Food Restaurant',
 'Wings Joint',
 'Cocktail Bar',
 'Empanada Restaurant']   

In [38]:
northyork_onehot = northyork_onehot[important_list_of_features].drop(
    columns = ['Neighborhood Latitude', 'Neighborhood Longitude']).groupby(
    'Neighborhood').sum()


northyork_onehot.head()

KeyError: "['Neighborhood Latitude', 'Neighborhood', 'Neighborhood Longitude'] not in index"

## Clustering

In [39]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# run k-means clustering
kmeans = KMeans(n_clusters = 5, random_state = 0).fit(northyork_onehot)

In [40]:
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = northyork_onehot.columns
means_df.index = ['L1','L2','L3','L4','L5']
means_df['Total Sum'] = means_df.sum(axis = 1)
means_df.sort_values(axis = 0, by = ['Total Sum'], ascending=False)

Unnamed: 0,Korean Restaurant,Chinese Restaurant,Diner,Burger Joint,American Restaurant,Caribbean Restaurant,Japanese Restaurant,Food Court,Restaurant,Beer Store,Fried Chicken Joint,Asian Restaurant,Middle Eastern Restaurant,Hot Dog Joint,Indian Restaurant,Ramen Restaurant,Steakhouse,Seafood Restaurant,Indonesian Restaurant,Sushi Restaurant,Burrito Place,Sports Bar,Vietnamese Restaurant,Bar,Karaoke Bar,Italian Restaurant,Pub,French Restaurant,Eastern European Restaurant,Thai Restaurant,Greek Restaurant,Dim Sum Restaurant,Mediterranean Restaurant,Falafel Restaurant,Turkish Restaurant,Latin American Restaurant,Portuguese Restaurant,Comfort Food Restaurant,Wings Joint,Cocktail Bar,Empanada Restaurant,Total Sum
L4,5.0,2.0,0.0,0.0,1.0,0.0,6.0,0.0,3.0,1.0,2.0,0.0,1.0,0.0,0.0,6.0,1.0,1.0,1.0,4.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,42.0
L1,0.0,0.5,0.25,1.0,0.75,1.0,2.0,0.25,1.75,0.75,0.25,0.75,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.75,0.0,0.25,0.0,0.75,0.0,0.25,0.0,0.0,0.0,0.25,0.25,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.75
L3,4.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0
L5,0.0,0.0,0.0,0.0,0.5,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.5,0.5,0.0,0.5,0.0,0.0,0.5,0.0,0.5,0.5,0.0,0.0,10.5
L2,0.125,0.3125,0.1875,0.0625,0.0625,0.0625,0.125,6.938894e-18,0.375,0.0625,0.25,2.775558e-17,0.0625,6.938894e-18,1.387779e-17,0.0,6.938894e-18,0.0625,6.938894e-18,0.125,6.938894e-18,1.387779e-17,0.4375,2.775558e-17,6.938894e-18,0.1875,0.0,0.0625,0.0625,1.387779e-17,0.0625,0.0625,0.125,0.0625,0.125,0.0625,0.0625,6.938894e-18,6.938894e-18,0.0625,0.0625,3.3125


## Results

#### Best location is L4
#### 2nd best location is L1
#### 3rd best location is L3

## Associating the locations to each neighborhood

In [46]:
neigh_summary = pd.DataFrame([northyork_data.index, 1 + kmeans.labels_]).T
neigh_summary.columns = ['Neighborhood', 'Location']
neigh_summary


Unnamed: 0,Neighborhood,Location
0,0,2
1,1,2
2,2,5
3,3,2
4,4,1
5,5,2
6,6,2
7,7,2
8,8,2
9,9,2


In [49]:
name_of_neigh = list(neigh_summary[neigh_summary['Location'] == 5]['Neighborhood'])[0]
northyork_venues[northyork_venues['Neighborhood'] == name_of_neigh].iloc[0,1:5].to_dict()


IndexError: single positional indexer is out-of-bounds