## Combined New York, Toronto and Pittsburgh Notebook

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-2.2.2               |           py35_1         462 KB  conda-forge
    certifi-2018.8.24          |        py35_1001         139 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ca-certificates-2019.3.9   |       hecc5488_0         146 KB  conda-forge
    openssl-1.0.2r             |       h14c3975_0         3.1 MB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         4.0 MB

The following NEW packages will

## Get New York Data

In [2]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


In [3]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [4]:
neighborhoods_data = newyork_data['features']

In [5]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [6]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

## Reduce New York to Manhattan Only

In [7]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [8]:
manhattan_data=manhattan_data.drop('Borough', 1)
manhattan_data.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Marble Hill,40.876551,-73.91066
1,Chinatown,40.715618,-73.994279
2,Washington Heights,40.851903,-73.9369
3,Inwood,40.867684,-73.92121
4,Hamilton Heights,40.823604,-73.949688


## Draw Manhattan Map

In [9]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of Manhattan using latitude and longitude values
map_manhattan = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  
    
map_manhattan

## Get Toronto Data

In [10]:
from bs4 import BeautifulSoup

In [11]:
source=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [12]:
soup=BeautifulSoup(source, 'lxml')

## Decomposition of Toronto Wikipedia Web Structure

In [13]:
Ttable = soup.find('table')
Tbody = Ttable.find('tbody')
Header = Tbody.find_all('th')
print (Header)

[<th>Postcode</th>, <th>Borough</th>, <th>Neighbourhood
</th>]


In [14]:
Tdat=Tbody.find_all('tr')
i = len(Tdat)
print (i)

289


## Main program logic of parsing Toronto Wikipedia Data

#### We parse each row to see if we can build an array that we ultimately importinto a pandas dataframe
#### The first logic is to get the features. 
#### The second logic is to see how next row(s) have to be combined for a single postal code
#### Manually accessing indices to see next row(s)
#### Lastly, we handle the rows for postal codes that have one neighborhood

In [15]:
# number of rows in final table
drowc = 0
# counting index of rows
n = 0
# array to be used to place data before converting to panda
datarr = []

for row in Tdat:
    
    header=row.find_all('th')
    hc=len(header)
    if hc == 3:
        feature = [header[0].text,header[1].text,header[2].text ]
        j= len(feature[2])
        temp=feature[2]
        feature[2]=temp[0:(j-1)]
        if n < i:
            n = n+1
    #look at multiple rows in one postal code        
    if n < i-1 and Tdat[n].find('td') == Tdat[n+1].find('td'):
        thisrow = Tdat[n].find_all('td')
        thisinstance =[thisrow[0].text, thisrow[1].text, thisrow[2].text] 
        j= len(thisinstance[2])
        temp=thisinstance[2]
        thisinstance[2]=temp[0:(j-1)]
        runningrow=thisinstance[2]
        while n < i-1 and Tdat[n].find('td') == Tdat[n+1].find('td'):
            nextrow =Tdat[n+1].find_all('td')
            nextinstance =[nextrow[0].text, nextrow[1].text, nextrow[2].text] 
            j= len(nextinstance[2])
            temp=nextinstance[2]
            nextinstance[2]=temp[0:(j-1)]
            runningrow = runningrow + ', '+ nextinstance[2]
            instance2 = [thisinstance[0], thisinstance[1], runningrow]
            if n < i:
                n = n+1
            
        datarr.append(instance2)
        drowc=drowc+1
        if n < i:
            n = n+1
            
    #look at single row postal codes
    else:
      if n < i:
        datarow = Tdat[n].find_all('td')
        dcolumnc=len(datarow)
        
        #ignore postal codes where borough not assigned
        if dcolumnc == 3 and datarow[1].text != 'Not assigned':
            instance = [datarow[0].text,datarow[1].text, datarow[2].text]
            j=len(instance[2])
            temp=instance[2]
            instance[2]=temp[0:(j-1)]
            
            # name neighborhood after borough if unassaigned
            if instance[2] == 'Not assigned':
                instance[2] = instance[1]
            datarr.append(instance)
            drowc = drowc+1
            if n < i:
                n=n+1
            
        else:
            if n < i:
                n=n+1

### Rows to be inserted into pandas

In [16]:
print (drowc)

103


In [17]:
df=pd.DataFrame(datarr)

In [18]:
feature[0] = 'Postal Code'
feature[2] = 'Neighborhood'
df.columns = feature

In [19]:
df.head(drowc)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [20]:
df.shape

(103, 3)

## Getting Toronto Longitude and Latitude files

In [21]:
!wget -O longlat.csv http://cocl.us/Geospatial_data

--2019-05-24 15:19:25--  http://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 169.48.113.201
Connecting to cocl.us (cocl.us)|169.48.113.201|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://cocl.us/Geospatial_data [following]
--2019-05-24 15:19:25--  https://cocl.us/Geospatial_data
Connecting to cocl.us (cocl.us)|169.48.113.201|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-05-24 15:19:28--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 107.152.27.197
Connecting to ibm.box.com (ibm.box.com)|107.152.27.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-05-24 15:19:29--  https://ibm.box.com/public/static/9afzr83pps4pwf2smjjc

In [22]:
df2 = pd.read_csv('longlat.csv')
df2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Merge Toronto pandas on Postal Code Column

In [23]:
toronto_data = df.merge(df2, on='Postal Code')
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


In [24]:
toronto_data.shape

(103, 5)

## Limit further Analysis to  'Toronto' named Boroughs

In [25]:
dfDT = toronto_data[toronto_data['Borough'] == 'Downtown Toronto' ].reset_index(drop=True)
dfDT.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


In [26]:
dfET = toronto_data[toronto_data['Borough'] == 'East Toronto' ].reset_index(drop=True)
dfET.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558


In [27]:
dfWT = toronto_data[toronto_data['Borough'] == 'West Toronto' ].reset_index(drop=True)
dfWT.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259
1,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975
2,M6K,West Toronto,"Brockton, Exhibition Place, Parkdale Village",43.636847,-79.428191
3,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763
4,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325


In [28]:
dfCT = toronto_data[toronto_data['Borough'] == 'Central Toronto' ].reset_index(drop=True)
dfCT.head(20)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M5N,Central Toronto,Roselawn,43.711695,-79.416936
2,M4P,Central Toronto,Davisville North,43.712751,-79.390197
3,M5P,Central Toronto,"Forest Hill North, Forest Hill West",43.696948,-79.411307
4,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
5,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678
6,M4S,Central Toronto,Davisville,43.704324,-79.38879
7,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
8,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


In [29]:
frames=[dfDT, dfET, dfWT, dfCT]
df7= pd.concat(frames)
new_toronto_data = df7.reset_index(drop=True)

In [30]:
new_toronto_data = new_toronto_data.drop('Postal Code', 1)
new_toronto_data = new_toronto_data.drop('Borough', 1)
new_toronto_data.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,"Harbourfront, Regent Park",43.65426,-79.360636
1,"Ryerson, Garden District",43.657162,-79.378937
2,St. James Town,43.651494,-79.375418
3,Berczy Park,43.644771,-79.373306
4,Central Bay Street,43.657952,-79.387383


## Draw Toronto Map

In [31]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [32]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(new_toronto_data['Latitude'], new_toronto_data['Longitude'], new_toronto_data['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## Get Pittsburgh Data

In [33]:
#Pittsburgh LatLong
!wget -O Pittlonglat.csv https://s3.amazonaws.com/filestorejm/PittLongLatcsv.csv

--2019-05-24 15:19:32--  https://s3.amazonaws.com/filestorejm/PittLongLatcsv.csv
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.96.221
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.96.221|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1182 (1.2K) [text/csv]
Saving to: ‘Pittlonglat.csv’


2019-05-24 15:19:32 (77.5 MB/s) - ‘Pittlonglat.csv’ saved [1182/1182]



In [34]:
pitt_data = pd.read_csv('Pittlonglat.csv')
pitt_data.head(40)

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Allegheny Center,40.453,-80.005
1,Allegheny West,40.452083,-80.015775
2,Beechview,40.413687,-80.022433
3,Bloomfield,40.461,-79.948
4,Carrick,40.397,-79.987
5,Central Business District (Downtown),40.441111,-80.0
6,Central Lawrenceville,40.472,-79.959
7,Central Northside,40.456,-80.01
8,Central Oakland,40.442569,-79.945973
9,Duquesne Heights,40.436,-80.02


## Draw Pittsburgh Map

In [35]:
address = 'Pittsburgh, PA'

geolocator = Nominatim(user_agent="pitt_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Pittsburgh are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Pittsburgh are 40.4416941, -79.9900861.


In [36]:
# create map of Toronto using latitude and longitude values
map_pitt = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(pitt_data['Latitude'], pitt_data['Longitude'], pitt_data['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_pitt)  
    
map_pitt

## Explore venues in Manhattan, Toronto and Pittsburgh Combined

In [37]:
frames_combined=[manhattan_data, new_toronto_data, pitt_data]
combined_data = pd.concat(frames_combined, keys=['new york', 'toronto', 'pittsburgh'])
combined_data =combined_data.reset_index()
combined_data.head()

Unnamed: 0,level_0,level_1,Neighborhood,Latitude,Longitude
0,new york,0,Marble Hill,40.876551,-73.91066
1,new york,1,Chinatown,40.715618,-73.994279
2,new york,2,Washington Heights,40.851903,-73.9369
3,new york,3,Inwood,40.867684,-73.92121
4,new york,4,Hamilton Heights,40.823604,-73.949688


In [38]:
combined_data.shape

(114, 5)

##  Foursquare Credentials

In [39]:
# The code was removed by Watson Studio for sharing.

In [40]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)# limit of number of venues returned by Foursquare API
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [41]:
combined_venues = getNearbyVenues(names=combined_data['Neighborhood'],
                                   latitudes=combined_data['Latitude'],
                                   longitudes=combined_data['Longitude']
                                  )

Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Christie
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetow

## look at all the venues - check shape

In [42]:
print(combined_venues.shape)
combined_venues.head()

(9277, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Marble Hill,40.876551,-73.91066,Arturo's,40.874412,-73.910271,Pizza Place
1,Marble Hill,40.876551,-73.91066,Bikram Yoga,40.876844,-73.906204,Yoga Studio
2,Marble Hill,40.876551,-73.91066,Tibbett Diner,40.880404,-73.908937,Diner
3,Marble Hill,40.876551,-73.91066,Sam's Pizza,40.879435,-73.905859,Pizza Place
4,Marble Hill,40.876551,-73.91066,Loeser's Delicatessen,40.879242,-73.905471,Sandwich Place


## Venues for each neighborhood

In [43]:
combined_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Allegheny Center,99,99,99,99,99,99
Allegheny West,100,100,100,100,100,100
Battery Park City,100,100,100,100,100,100
Beechview,24,24,24,24,24,24
Berczy Park,100,100,100,100,100,100
Bloomfield,100,100,100,100,100,100
"Brockton, Exhibition Place, Parkdale Village",100,100,100,100,100,100
Business Reply Mail Processing Centre 969 Eastern,47,47,47,47,47,47
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",16,16,16,16,16,16


In [44]:
print('There are {} uniques categories.'.format(len(combined_venues['Venue Category'].unique())))

There are 408 uniques categories.


## Analyze each neighborhhod

In [45]:
# one hot encoding
combined_onehot = pd.get_dummies(combined_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
combined_onehot['Neighborhood'] = combined_venues['Neighborhood']
       
# move neighborhood column to the first column
#fixed_columns = [combined_onehot.columns[-1]] + list(combined_onehot.columns[:-1])
fixed_columns = [combined_onehot.columns[-158]] + list(combined_onehot.columns[:-158]) + list(combined_onehot.columns[-157:])
combined_onehot = combined_onehot[fixed_columns]    
   #fixed_columns
combined_onehot.head()

Unnamed: 0,Music Venue,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,Auto Dealership,Auto Garage,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Beach,Beach Bar,Beer Bar,Beer Garden,Beer Store,Belgian Restaurant,Big Box Store,Bike Shop,Bike Trail,Bistro,Boat or Ferry,Bookstore,Border Crossing,Botanical Garden,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Buffet,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Cambodian Restaurant,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Casino,Castle,Caucasian Restaurant,Cemetery,Check Cashing Service,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Church,Churrascaria,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Arts Building,College Baseball Diamond,College Basketball Court,College Cafeteria,College Gym,College Quad,College Theater,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Curling Ice,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space,Exhibit,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Fondue Restaurant,Food,Food & Drink Shop,Food Court,Food Truck,Football Stadium,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Golf Driving Range,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Heliport,High School,Historic Site,History Museum,Hobby Shop,Home Service,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hungarian Restaurant,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Intersection,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Lake,Latin American Restaurant,Laundry Service,Lebanese Restaurant,Library,Light Rail Station,Lighthouse,Lingerie Store,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Mongolian Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,Multiplex,Museum,Music School,Music Store,Nail Salon,Neighborhood,New American Restaurant,Newsstand,Nightclub,Non-Profit,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Nightlife,Other Repair Shop,Outdoor Sculpture,Outdoor Supply Store,Outdoors & Recreation,Outlet Store,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Perfume Shop,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Service,Pet Store,Pharmacy,Photography Studio,Piano Bar,Pide Place,Pie Shop,Pier,Pilates Studio,Pizza Place,Planetarium,Playground,Plaza,Poke Place,Pool,Pool Hall,Portuguese Restaurant,Poutine Place,Pub,Public Art,Racetrack,Radio Station,Ramen Restaurant,Record Shop,Rental Car Location,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Rock Climbing Spot,Rock Club,Roof Deck,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Science Museum,Sculpture Garden,Seafood Restaurant,Shanghai Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soba Restaurant,Soccer Stadium,Social Club,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Spiritual Center,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,State / Provincial Park,Stationery Store,Steakhouse,Storage Facility,Street Art,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Swiss Restaurant,Synagogue,Szechuan Restaurant,TV Station,Taco Place,Tailor Shop,Taiwanese Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Tech Startup,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Tourist Information Center,Toy / Game Store,Track,Trail,Train Station,Tram Station,Travel & Transport,Tree,Tunnel,Turkish Restaurant,Udon Restaurant,Ukrainian Restaurant,University,Used Bookstore,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Waste Facility,Watch Shop,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [46]:
combined_onehot.shape

(9277, 408)

## Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [47]:
combined_grouped = combined_onehot.groupby('Neighborhood').mean().reset_index()
combined_grouped.head()

Unnamed: 0,Neighborhood,Music Venue,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,Auto Dealership,Auto Garage,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Beach,Beach Bar,Beer Bar,Beer Garden,Beer Store,Belgian Restaurant,Big Box Store,Bike Shop,Bike Trail,Bistro,Boat or Ferry,Bookstore,Border Crossing,Botanical Garden,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Buffet,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Cambodian Restaurant,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Casino,Castle,Caucasian Restaurant,Cemetery,Check Cashing Service,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Church,Churrascaria,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Arts Building,College Baseball Diamond,College Basketball Court,College Cafeteria,College Gym,College Quad,College Theater,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Curling Ice,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space,Exhibit,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Fondue Restaurant,Food,Food & Drink Shop,Food Court,Food Truck,Football Stadium,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Golf Driving Range,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Heliport,High School,Historic Site,History Museum,Hobby Shop,Home Service,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hungarian Restaurant,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Intersection,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Lake,Latin American Restaurant,Laundry Service,Lebanese Restaurant,Library,Light Rail Station,Lighthouse,Lingerie Store,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Mongolian Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,Multiplex,Museum,Music School,Music Store,Nail Salon,New American Restaurant,Newsstand,Nightclub,Non-Profit,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Nightlife,Other Repair Shop,Outdoor Sculpture,Outdoor Supply Store,Outdoors & Recreation,Outlet Store,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Perfume Shop,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Service,Pet Store,Pharmacy,Photography Studio,Piano Bar,Pide Place,Pie Shop,Pier,Pilates Studio,Pizza Place,Planetarium,Playground,Plaza,Poke Place,Pool,Pool Hall,Portuguese Restaurant,Poutine Place,Pub,Public Art,Racetrack,Radio Station,Ramen Restaurant,Record Shop,Rental Car Location,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Rock Climbing Spot,Rock Club,Roof Deck,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Science Museum,Sculpture Garden,Seafood Restaurant,Shanghai Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soba Restaurant,Soccer Stadium,Social Club,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Spiritual Center,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,State / Provincial Park,Stationery Store,Steakhouse,Storage Facility,Street Art,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Swiss Restaurant,Synagogue,Szechuan Restaurant,TV Station,Taco Place,Tailor Shop,Taiwanese Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Tech Startup,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Tourist Information Center,Toy / Game Store,Track,Trail,Train Station,Tram Station,Travel & Transport,Tree,Tunnel,Turkish Restaurant,Udon Restaurant,Ukrainian Restaurant,University,Used Bookstore,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Waste Facility,Watch Shop,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0
1,Allegheny Center,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.040404,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040404,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.010101,0.020202,0.020202,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.010101,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.050505,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.010101,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040404,0.0,0.0,0.0,0.010101,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.010101,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.010101,0.0,0.020202,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.010101,0.010101,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.040404,0.010101,0.010101,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040404,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.040404,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.020202,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.010101,0.010101,0.0,0.010101,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.010101,0.0
2,Allegheny West,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.04,0.0,0.01,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0
3,Battery Park City,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.01,0.0,0.0,0.0
4,Beechview,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [48]:
combined_grouped.shape

(114, 408)

In [49]:
num_top_venues = 5

for hood in combined_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = combined_grouped[combined_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                 venue  freq
0                 Café  0.06
1          Coffee Shop  0.05
2                Hotel  0.05
3              Theater  0.04
4  American Restaurant  0.04


----Allegheny Center----
                 venue  freq
0          Coffee Shop  0.05
1                 Park  0.04
2                Hotel  0.04
3  American Restaurant  0.04
4     Sculpture Garden  0.04


----Allegheny West----
                 venue  freq
0                 Park  0.07
1  American Restaurant  0.06
2       Sandwich Place  0.04
3       Science Museum  0.04
4               Museum  0.04


----Battery Park City----
                  venue  freq
0                  Park  0.08
1           Coffee Shop  0.07
2                   Gym  0.03
3  Gym / Fitness Center  0.03
4            Steakhouse  0.03


----Beechview----
                venue  freq
0  Light Rail Station  0.12
1               Hotel  0.08
2         Pizza Place  0.08
3   Indian Restaurant  0.04
4         Pet Service  0.

## Make dataframe

In [50]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

## display top 10 venues from dataframe

In [51]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = combined_grouped['Neighborhood']

for ind in np.arange(combined_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(combined_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Café,Hotel,Coffee Shop,American Restaurant,Theater,Pizza Place,Sushi Restaurant,Steakhouse,Gym,Cosmetics Shop
1,Allegheny Center,Coffee Shop,Art Museum,Hotel,Deli / Bodega,Park,American Restaurant,Sculpture Garden,Burger Joint,Sports Bar,Bar
2,Allegheny West,Park,American Restaurant,Museum,Sandwich Place,Science Museum,Steakhouse,Coffee Shop,Art Museum,Lounge,Restaurant
3,Battery Park City,Park,Coffee Shop,Wine Shop,Steakhouse,Memorial Site,Gym,Gym / Fitness Center,Plaza,Burger Joint,Dog Run
4,Beechview,Light Rail Station,Hotel,Pizza Place,Supermarket,Playground,Food & Drink Shop,Moving Target,Fast Food Restaurant,Mexican Restaurant,Bar


### Cluster Neighborhoods

In [52]:
# set number of clusters
kclusters = 15

combined_grouped_clustering = combined_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(combined_grouped_clustering)

# check cluster labels generated for each row in the dataframe for distribution
kmeans.labels_[0:150] 

array([ 0, 12, 11, 14, 12,  0,  3,  0, 14,  9,  0,  8,  3,  0, 12,  1,  3,
       12,  8, 11,  8,  1,  1,  0,  8, 12,  0,  9,  9,  9,  0,  9,  5, 12,
        1, 11,  8,  8,  0,  8,  9,  3,  8, 13,  8,  1,  1,  0,  9,  4,  3,
        7, 11, 12,  1, 14,  8,  8,  8,  3,  1,  3, 11,  1,  1,  1, 12,  8,
        9,  1,  3,  8,  8,  8, 12,  1,  3, 14,  3, 14,  1, 14,  9,  9,  0,
        8,  8, 12, 11,  3,  3,  8,  0,  2,  0, 11,  3,  1,  8,  0,  3,  9,
        9,  8,  6,  8,  8, 11,  3,  8,  1, 10,  8,  8], dtype=int32)

## dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [53]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

combined_merged = combined_data

# merge combined_grouped with combined_data to add latitude/longitude for each neighborhood
combined_merged = combined_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

combined_merged.head()

Unnamed: 0,level_0,level_1,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,new york,0,Marble Hill,40.876551,-73.91066,1,Park,Donut Shop,Pizza Place,Bakery,Supermarket,Mexican Restaurant,Spanish Restaurant,Café,Sandwich Place,Pharmacy
1,new york,1,Chinatown,40.715618,-73.994279,8,Cocktail Bar,Ice Cream Shop,Café,Chinese Restaurant,Wine Bar,Hotel,Sandwich Place,Coffee Shop,French Restaurant,Optical Shop
2,new york,2,Washington Heights,40.851903,-73.9369,1,Pizza Place,Latin American Restaurant,Bakery,Café,Mexican Restaurant,Deli / Bodega,Bar,Mobile Phone Shop,Grocery Store,Tapas Restaurant
3,new york,3,Inwood,40.867684,-73.92121,1,Latin American Restaurant,Café,Mexican Restaurant,Pizza Place,Wine Bar,Spanish Restaurant,Deli / Bodega,Bakery,Lounge,Bar
4,new york,4,Hamilton Heights,40.823604,-73.949688,1,Bar,Coffee Shop,Café,Mexican Restaurant,Caribbean Restaurant,Yoga Studio,Sushi Restaurant,Chinese Restaurant,Park,Spanish Restaurant


In [54]:
combined_merged.set_index("level_0", inplace=True)
new_york_merged=combined_merged.loc['new york']

In [55]:
toronto_merged=combined_merged.loc['toronto']

In [56]:
pittsburgh_merged=combined_merged.loc['pittsburgh']
pittsburgh_merged.head(5)

Unnamed: 0_level_0,level_1,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
pittsburgh,0,Allegheny Center,40.453,-80.005,12,Coffee Shop,Art Museum,Hotel,Deli / Bodega,Park,American Restaurant,Sculpture Garden,Burger Joint,Sports Bar,Bar
pittsburgh,1,Allegheny West,40.452083,-80.015775,11,Park,American Restaurant,Museum,Sandwich Place,Science Museum,Steakhouse,Coffee Shop,Art Museum,Lounge,Restaurant
pittsburgh,2,Beechview,40.413687,-80.022433,12,Light Rail Station,Hotel,Pizza Place,Supermarket,Playground,Food & Drink Shop,Moving Target,Fast Food Restaurant,Mexican Restaurant,Bar
pittsburgh,3,Bloomfield,40.461,-79.948,3,Pizza Place,Coffee Shop,Bar,Grocery Store,Hotel,Bank,Chinese Restaurant,Sandwich Place,Italian Restaurant,Rental Car Location
pittsburgh,4,Carrick,40.397,-79.987,3,Bank,Pharmacy,Chinese Restaurant,Bar,Pizza Place,Video Store,Auto Garage,Sandwich Place,Park,Gym


In [57]:
new_york_merged.head(5)

Unnamed: 0_level_0,level_1,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
new york,0,Marble Hill,40.876551,-73.91066,1,Park,Donut Shop,Pizza Place,Bakery,Supermarket,Mexican Restaurant,Spanish Restaurant,Café,Sandwich Place,Pharmacy
new york,1,Chinatown,40.715618,-73.994279,8,Cocktail Bar,Ice Cream Shop,Café,Chinese Restaurant,Wine Bar,Hotel,Sandwich Place,Coffee Shop,French Restaurant,Optical Shop
new york,2,Washington Heights,40.851903,-73.9369,1,Pizza Place,Latin American Restaurant,Bakery,Café,Mexican Restaurant,Deli / Bodega,Bar,Mobile Phone Shop,Grocery Store,Tapas Restaurant
new york,3,Inwood,40.867684,-73.92121,1,Latin American Restaurant,Café,Mexican Restaurant,Pizza Place,Wine Bar,Spanish Restaurant,Deli / Bodega,Bakery,Lounge,Bar
new york,4,Hamilton Heights,40.823604,-73.949688,1,Bar,Coffee Shop,Café,Mexican Restaurant,Caribbean Restaurant,Yoga Studio,Sushi Restaurant,Chinese Restaurant,Park,Spanish Restaurant


In [58]:
toronto_merged.head(5)

Unnamed: 0_level_0,level_1,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
toronto,0,"Harbourfront, Regent Park",43.65426,-79.360636,9,Coffee Shop,Café,Theater,Italian Restaurant,Restaurant,Breakfast Spot,Diner,Bakery,Park,Gym / Fitness Center
toronto,1,"Ryerson, Garden District",43.657162,-79.378937,0,Coffee Shop,Cosmetics Shop,Middle Eastern Restaurant,Café,Restaurant,Tea Room,Gastropub,Clothing Store,Ramen Restaurant,Spa
toronto,2,St. James Town,43.651494,-79.375418,0,Café,Coffee Shop,Restaurant,Hotel,Bakery,Seafood Restaurant,Gym,Italian Restaurant,Gastropub,American Restaurant
toronto,3,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Café,Hotel,Beer Bar,Restaurant,Japanese Restaurant,Cocktail Bar,Bakery,Park,Art Gallery
toronto,4,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Café,Ramen Restaurant,Japanese Restaurant,Italian Restaurant,Park,Arts & Crafts Store,Bubble Tea Shop,Clothing Store,Bar


In [59]:
pittsburgh_merged.head(5)

Unnamed: 0_level_0,level_1,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
pittsburgh,0,Allegheny Center,40.453,-80.005,12,Coffee Shop,Art Museum,Hotel,Deli / Bodega,Park,American Restaurant,Sculpture Garden,Burger Joint,Sports Bar,Bar
pittsburgh,1,Allegheny West,40.452083,-80.015775,11,Park,American Restaurant,Museum,Sandwich Place,Science Museum,Steakhouse,Coffee Shop,Art Museum,Lounge,Restaurant
pittsburgh,2,Beechview,40.413687,-80.022433,12,Light Rail Station,Hotel,Pizza Place,Supermarket,Playground,Food & Drink Shop,Moving Target,Fast Food Restaurant,Mexican Restaurant,Bar
pittsburgh,3,Bloomfield,40.461,-79.948,3,Pizza Place,Coffee Shop,Bar,Grocery Store,Hotel,Bank,Chinese Restaurant,Sandwich Place,Italian Restaurant,Rental Car Location
pittsburgh,4,Carrick,40.397,-79.987,3,Bank,Pharmacy,Chinese Restaurant,Bar,Pizza Place,Video Store,Auto Garage,Sandwich Place,Park,Gym


## need to separate maps and visualize clusters

In [60]:
# create manhattan map

address = 'New York, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(new_york_merged['Latitude'], new_york_merged['Longitude'], new_york_merged['Neighborhood'], new_york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-3)],
        fill=True,
        fill_color=rainbow[int(cluster-3)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters



In [61]:
# create Toronto map

address = 'Toronto, ON'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-3)],
        fill=True,
        fill_color=rainbow[int(cluster-3)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [62]:
# create Pittsburgh map

address = 'Pittsburgh, PA'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(pittsburgh_merged['Latitude'], pittsburgh_merged['Longitude'], pittsburgh_merged['Neighborhood'], pittsburgh_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-3)],
        fill=True,
        fill_color=rainbow[int(cluster-3)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

In [63]:
pd_0 = combined_merged.loc[combined_merged['Cluster Labels'] == 0, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd0 = pd_0.to_csv (r'km0.csv', index = None, header=True)
pd_0.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
toronto,"Ryerson, Garden District",Coffee Shop,Cosmetics Shop,Middle Eastern Restaurant,Café,Restaurant,Tea Room,Gastropub,Clothing Store,Ramen Restaurant,Spa
toronto,St. James Town,Café,Coffee Shop,Restaurant,Hotel,Bakery,Seafood Restaurant,Gym,Italian Restaurant,Gastropub,American Restaurant
toronto,Berczy Park,Coffee Shop,Café,Hotel,Beer Bar,Restaurant,Japanese Restaurant,Cocktail Bar,Bakery,Park,Art Gallery
toronto,Central Bay Street,Coffee Shop,Café,Ramen Restaurant,Japanese Restaurant,Italian Restaurant,Park,Arts & Crafts Store,Bubble Tea Shop,Clothing Store,Bar
toronto,"Adelaide, King, Richmond",Café,Hotel,Coffee Shop,American Restaurant,Theater,Pizza Place,Sushi Restaurant,Steakhouse,Gym,Cosmetics Shop


In [64]:
pd_1 = combined_merged.loc[combined_merged['Cluster Labels'] == 1, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd1 = pd_1.to_csv (r'km1.csv', index = None, header=True)
pd_1.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
new york,Marble Hill,Park,Donut Shop,Pizza Place,Bakery,Supermarket,Mexican Restaurant,Spanish Restaurant,Café,Sandwich Place,Pharmacy
new york,Washington Heights,Pizza Place,Latin American Restaurant,Bakery,Café,Mexican Restaurant,Deli / Bodega,Bar,Mobile Phone Shop,Grocery Store,Tapas Restaurant
new york,Inwood,Latin American Restaurant,Café,Mexican Restaurant,Pizza Place,Wine Bar,Spanish Restaurant,Deli / Bodega,Bakery,Lounge,Bar
new york,Hamilton Heights,Bar,Coffee Shop,Café,Mexican Restaurant,Caribbean Restaurant,Yoga Studio,Sushi Restaurant,Chinese Restaurant,Park,Spanish Restaurant
new york,Manhattanville,Mexican Restaurant,Park,Café,Italian Restaurant,American Restaurant,Seafood Restaurant,Theater,Tennis Court,Chinese Restaurant,Southern / Soul Food Restaurant


In [65]:
pd_2 = combined_merged.loc[combined_merged['Cluster Labels'] == 2, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd2 = pd_2.to_csv (r'km2.csv', index = None, header=True)
pd_2.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
pittsburgh,Stanton Heights,Video Store,Brewery,Baseball Field,Plaza,Playground,Farm,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant


In [66]:
pd_3 = combined_merged.loc[combined_merged['Cluster Labels'] == 3, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd3 = pd_3.to_csv (r'km3.csv', index = None, header=True)
pd_3.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
toronto,The Beaches,Pizza Place,Pub,Coffee Shop,Japanese Restaurant,Breakfast Spot,Beach,Bar,Nail Salon,Sandwich Place,Gastropub
toronto,Studio District,Coffee Shop,Bar,American Restaurant,Bakery,Café,Italian Restaurant,Vietnamese Restaurant,Brewery,Diner,Sandwich Place
toronto,"Little Portugal, Trinity",Bar,Café,Bakery,Restaurant,Coffee Shop,Cocktail Bar,Pizza Place,Asian Restaurant,Italian Restaurant,Vegetarian / Vegan Restaurant
toronto,"High Park, The Junction South",Bar,Café,Coffee Shop,Convenience Store,Italian Restaurant,Thai Restaurant,Sushi Restaurant,Metro Station,Bakery,Gastropub
toronto,"Parkdale, Roncesvalles",Coffee Shop,Bar,Café,Sushi Restaurant,Breakfast Spot,Pizza Place,Thai Restaurant,Bakery,Sandwich Place,Eastern European Restaurant


In [67]:
pd_4 = combined_merged.loc[combined_merged['Cluster Labels'] == 4, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd4 = pd_4.to_csv (r'km4.csv', index = None, header=True)
pd_4.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
pittsburgh,Hazelwood,Clothing Store,Pizza Place,Eastern European Restaurant,Bar,Pharmacy,Bakery,Food,Convenience Store,Flea Market,Fish Market


In [68]:
pd_5 = combined_merged.loc[combined_merged['Cluster Labels'] == 5, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd5 = pd_5.to_csv (r'km5.csv', index = None, header=True)
pd_5.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
pittsburgh,Duquesne Heights,Scenic Lookout,American Restaurant,Seafood Restaurant,New American Restaurant,Fountain,Soccer Stadium,Outdoor Sculpture,Italian Restaurant,Fish Market,Dumpling Restaurant


In [69]:
pd_6 = combined_merged.loc[combined_merged['Cluster Labels'] == 6, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd6 = pd_6.to_csv (r'km6.csv', index = None, header=True)
pd_6.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
pittsburgh,Troy Hill,New American Restaurant,Café,Bar,Check Cashing Service,Baseball Field,Harbor / Marina,Park,Snack Place,Gas Station,Trail


In [70]:
pd_7 = combined_merged.loc[combined_merged['Cluster Labels'] == 7, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd7 = pd_7.to_csv (r'km7.csv', index = None, header=True)
pd_7.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
pittsburgh,Highland Park,Zoo Exhibit,Park,Gift Shop,American Restaurant,Playground,Fast Food Restaurant,Tennis Court,Grocery Store,Bar,Bakery


In [71]:
pd_8 = combined_merged.loc[combined_merged['Cluster Labels'] == 8, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd8 = pd_8.to_csv (r'km8.csv', index = None, header=True)
pd_8.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
new york,Chinatown,Cocktail Bar,Ice Cream Shop,Café,Chinese Restaurant,Wine Bar,Hotel,Sandwich Place,Coffee Shop,French Restaurant,Optical Shop
new york,Yorkville,Italian Restaurant,Gym,Coffee Shop,Pizza Place,Ice Cream Shop,Bar,Japanese Restaurant,Bagel Shop,Wine Shop,Gym / Fitness Center
new york,Lenox Hill,Sushi Restaurant,Italian Restaurant,Gym / Fitness Center,Sporting Goods Shop,Gym,Coffee Shop,French Restaurant,Pizza Place,Bakery,Dessert Shop
new york,Upper West Side,Italian Restaurant,Wine Bar,Bakery,Park,Indian Restaurant,Coffee Shop,Vegetarian / Vegan Restaurant,Ice Cream Shop,American Restaurant,Burger Joint
new york,Lincoln Square,Gym / Fitness Center,Coffee Shop,Gym,French Restaurant,Plaza,Italian Restaurant,Jazz Club,Sushi Restaurant,Indie Movie Theater,Theater


In [72]:
pd_9 = combined_merged.loc[combined_merged['Cluster Labels'] == 9, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd9 = pd_9.to_csv (r'km9.csv', index = None, header=True)
pd_9.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
toronto,"Harbourfront, Regent Park",Coffee Shop,Café,Theater,Italian Restaurant,Restaurant,Breakfast Spot,Diner,Bakery,Park,Gym / Fitness Center
toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",Coffee Shop,Café,Harbor / Marina,Dance Studio,Airport Lounge,Garden,Sculpture Garden,Park,Airport,Dog Run
toronto,"The Danforth West, Riverdale",Greek Restaurant,Coffee Shop,Café,Pub,Fast Food Restaurant,Ice Cream Shop,Pizza Place,Italian Restaurant,Furniture / Home Store,Diner
toronto,"The Beaches West, India Bazaar",Indian Restaurant,Coffee Shop,Café,Beach,Park,Harbor / Marina,Restaurant,Brewery,Burrito Place,Fast Food Restaurant
toronto,"Dovercourt Village, Dufferin",Café,Park,Coffee Shop,Bar,Brewery,Gourmet Shop,Portuguese Restaurant,Pharmacy,Supermarket,Sushi Restaurant


In [73]:
pd_10 = combined_merged.loc[combined_merged['Cluster Labels'] == 10, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd10 = pd_10.to_csv (r'km10.csv', index = None, header=True)
pd_10.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
pittsburgh,West End,Baseball Field,Bar,Theater,French Restaurant,Diner,Pizza Place,Lingerie Store,Speakeasy,Gym / Fitness Center,Smoke Shop


In [74]:
pd_11 = combined_merged.loc[combined_merged['Cluster Labels'] == 11, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd11 = pd_11.to_csv (r'km11.csv', index = None, header=True)
pd_11.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
new york,Upper East Side,Exhibit,Italian Restaurant,Gym / Fitness Center,Coffee Shop,Bakery,Yoga Studio,Hotel,American Restaurant,Mexican Restaurant,Spanish Restaurant
new york,Chelsea,Art Gallery,Seafood Restaurant,Coffee Shop,Ice Cream Shop,Italian Restaurant,American Restaurant,Bakery,Hotel,Tapas Restaurant,Nightclub
pittsburgh,Allegheny West,Park,American Restaurant,Museum,Sandwich Place,Science Museum,Steakhouse,Coffee Shop,Art Museum,Lounge,Restaurant
pittsburgh,East Liberty,Coffee Shop,American Restaurant,Furniture / Home Store,Mexican Restaurant,Salon / Barbershop,Pizza Place,Grocery Store,Liquor Store,Thai Restaurant,Bar
pittsburgh,Hill District,Grocery Store,Bar,Bakery,Coffee Shop,Sandwich Place,Art Gallery,Seafood Restaurant,Diner,Gourmet Shop,Market


In [75]:
pd_12 = combined_merged.loc[combined_merged['Cluster Labels'] == 12, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd12 = pd_12.to_csv (r'km12.csv', index = None, header=True)
pd_12.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
new york,Clinton,Theater,Hotel,Wine Shop,Bakery,Burger Joint,Gym / Fitness Center,American Restaurant,Coffee Shop,Furniture / Home Store,Indie Theater
new york,Midtown,Theater,Hotel,Coffee Shop,Cuban Restaurant,Sandwich Place,Plaza,Gym,Chinese Restaurant,Mediterranean Restaurant,Italian Restaurant
new york,Hudson Yards,Dance Studio,Hotel,Theater,Italian Restaurant,Gym / Fitness Center,Coffee Shop,Gym,Wine Shop,Indie Theater,American Restaurant
pittsburgh,Allegheny Center,Coffee Shop,Art Museum,Hotel,Deli / Bodega,Park,American Restaurant,Sculpture Garden,Burger Joint,Sports Bar,Bar
pittsburgh,Beechview,Light Rail Station,Hotel,Pizza Place,Supermarket,Playground,Food & Drink Shop,Moving Target,Fast Food Restaurant,Mexican Restaurant,Bar


In [76]:
pd_13 = combined_merged.loc[combined_merged['Cluster Labels'] == 13, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd13 = pd_13.to_csv (r'km13.csv', index = None, header=True)
pd_13.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
pittsburgh,Greenfield,Bar,Pizza Place,Baseball Field,Basketball Court,Dive Bar,Theater,Gift Shop,Tennis Court,Sandwich Place,Brewery


In [77]:
pd_14 = combined_merged.loc[combined_merged['Cluster Labels'] == 14, combined_merged.columns[[1] + list(range(5, combined_merged.shape[1]))]]
export_pd14 = pd_14.to_csv (r'km14.csv', index = None, header=True)
pd_14.head()

Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
new york,Battery Park City,Park,Coffee Shop,Wine Shop,Steakhouse,Memorial Site,Gym,Gym / Fitness Center,Plaza,Burger Joint,Dog Run
toronto,Rosedale,Coffee Shop,Park,Bank,Grocery Store,Trail,Metro Station,BBQ Joint,Smoothie Shop,Candy Store,Sandwich Place
toronto,Business Reply Mail Processing Centre 969 Eastern,Park,Coffee Shop,Brewery,Burrito Place,Pizza Place,Pet Store,Italian Restaurant,Sushi Restaurant,Ice Cream Shop,Liquor Store
toronto,Lawrence Park,Trail,College Gym,Café,Coffee Shop,Park,Pharmacy,Bookstore,College Quad,Gym / Fitness Center,Elementary School
pittsburgh,Point Breeze,Intersection,Bakery,Furniture / Home Store,Park,Coffee Shop,New American Restaurant,Café,Art Museum,Thai Restaurant,Hotel
