# Final Project Notebook: Comparing Neighborhoods in Windsor and Detroit

This project compares the cities Windsor and Detroit. This comparison is intresting to many as a lot of people choose to live in Windsor and comute to Detroit every day for work. This project tries to discover the differences between these cities and understand why people choses to live or work in one city verse the other.  

## Data:
The data used for this project will be scrapped using Beautiful Soup library from two wikipedia pages that hold the information about all the neighborhoods in Detroit and Windsor.

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from pandas import DataFrame

print('Libraries imported.')

Libraries imported.


### Web Scraping using Beautiful Soup library for the City Windsor

In [2]:
# import Beautiful Soup
from bs4 import BeautifulSoup
import requests
#get wikipedia page source html for windsor
source= requests.get('https://en.wikipedia.org/wiki/Neighbourhoods_of_Windsor,_Ontario').text
soup= BeautifulSoup(source, 'lxml')

In [3]:
#get the first borough
print(soup.h3.text)

Heart of Windsor[edit]


In [4]:
#get all the neighborhoods in windsor
span=soup.find_all('span', class_='mw-headline')
spanh2=soup.find_all('h2')
spanh2=spanh2[1:-4]

In [5]:
#create empty df with column neighborhood
df_windsor=DataFrame(columns=['Neighborhood'])
df_windsor

Unnamed: 0,Neighborhood


In [6]:
#get all boroughs and save in a df
df_borough=DataFrame(columns=['Borough'])
for sh2 in spanh2:
    sh2=sh2.text.replace('[edit]', '')
    df_borough=df_borough.append({'Borough':sh2}, ignore_index=True)
df_borough

Unnamed: 0,Borough
0,Downtown
1,East End
2,West Side
3,South Side


In [7]:
# get all the neighborhoods and save in a df
for s in span:
    s=s.text
    df_windsor=df_windsor.append({'Neighborhood':s}, ignore_index=True)
df_windsor=df_windsor[:-3]
df_windsor.head()

Unnamed: 0,Neighborhood
0,Downtown
1,Heart of Windsor
2,Little Italy
3,Uptown Ottawa Street
4,South Walkerville


In [8]:
#add the borough for each neighborhood
df_windsor.loc[1:7, 'Borough']='Downtown'
df_windsor.loc[9:18, 'Borough']='East End'
df_windsor.loc[20:27, 'Borough']='West Side'
df_windsor.loc[29:, 'Borough']='South Side'
#df_windsor

In [9]:
#remove rows with nan
df_windsor.dropna(inplace=True)
#reset index after drop operation 
df_windsor.reset_index(drop=True, inplace=True)
df_windsor.head()

Unnamed: 0,Neighborhood,Borough
0,Heart of Windsor,Downtown
1,Little Italy,Downtown
2,Uptown Ottawa Street,Downtown
3,South Walkerville,Downtown
4,Walkerville,Downtown


### Web Scraping using Beautiful Soup library for the City Detroit

In [10]:
#create empty df with cloumns Neighborhood and borough
df_detroit=DataFrame(columns=['Neighborhood', 'Borough'])
df_detroit

Unnamed: 0,Neighborhood,Borough


In [11]:
#get wikipedia page source html
source_detroit= requests.get('https://en.wikipedia.org/wiki/List_of_neighborhoods_in_Detroit').text
soup_detroit= BeautifulSoup(source_detroit, 'lxml')
#get the div tag where info is stored
div=soup_detroit.find('div', class_='navbox')
tr=div.find_all('tr')
tr=tr[1:] # remove first row of table that consists of the Title: Neighborhoods in Detroit
for t2 in tr:
    l2=t2.find_all('li')
    th=t2.find_all('th')
    for t in th:
        #print(t.text)
        #print()
        for l3 in l2: 
            #print(l3.text)
            df_detroit=df_detroit.append({'Borough':t.text, 'Neighborhood':l3.text}, ignore_index=True)
        #print()

In [12]:
df_detroit.head()

Unnamed: 0,Neighborhood,Borough
0,Bricktown,Downtown
1,Broadway Ave.,Downtown
2,Campus Martius,Downtown
3,Capital Park,Downtown
4,Downtown,Downtown


In [13]:
#remove dupplicate entries
df_detroit=df_detroit[df_detroit.Borough!='Upper']
df_detroit=df_detroit[df_detroit.Borough!='Central']
df_detroit=df_detroit[df_detroit.Borough!='Lower']
df_detroit=df_detroit[df_detroit.Borough!='Mixed-use']
df_detroit=df_detroit[df_detroit.Borough!='Residential']
df_detroit.reset_index(drop=True, inplace=True)
df_detroit.head()

Unnamed: 0,Neighborhood,Borough
0,Bricktown,Downtown
1,Broadway Ave.,Downtown
2,Campus Martius,Downtown
3,Capital Park,Downtown
4,Downtown,Downtown


In [14]:
#add city and country to each dfs
df_windsor['City']='Windsor'
df_detroit['City']='Detroit'
df_windsor['Country']='CA'
df_detroit['Country']='US'
df_detroit.head()

Unnamed: 0,Neighborhood,Borough,City,Country
0,Bricktown,Downtown,Detroit,US
1,Broadway Ave.,Downtown,Detroit,US
2,Campus Martius,Downtown,Detroit,US
3,Capital Park,Downtown,Detroit,US
4,Downtown,Downtown,Detroit,US


In [15]:
df_windsor.shape

(30, 4)

### Get the Latitude and Longtitude for neighborhoods in windsor 

In [16]:
#Get the coordinates for each neighborhood
geolocator = Nominatim(user_agent="downtown_explorer")
lat=[] #intialize empty list lat to store all latitude values
lng=[] #intialize empty list lng to store all longitude values
for neighborhood in df_windsor['Neighborhood']:
    try:
        location = geolocator.geocode(neighborhood+', Windsor, Ontario, CA') #get the coordinates
        lat.append(location.latitude) #get latitude and append to the list lat
        lng.append(location.longitude) #get longtitude and append to the list lng
    except:
        #remove any neighborhoods from the windsor df for which the cordinates can't be found
        df_windsor=df_windsor[df_windsor.Neighborhood!=neighborhood] 

#print(lat)
#print(lng)
    
#print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))

In [17]:
df_windsor["Latitude"]=lat #assign list lat to the windsor df with new column Latitude 
df_windsor["Longitude"]=lng #assign list lng to the windsor df with new column Longtitude
df_windsor.head()

Unnamed: 0,Neighborhood,Borough,City,Country,Latitude,Longitude
1,Little Italy,Downtown,Windsor,CA,42.313629,-83.020592
3,South Walkerville,Downtown,Windsor,CA,42.295651,-82.996032
4,Walkerville,Downtown,Windsor,CA,42.307091,-82.994213
5,Central Windsor,Downtown,Windsor,CA,42.317099,-83.035343
8,East Windsor,East End,Windsor,CA,42.312449,-82.978921


### Plot the neighborhoods in Windsor on a map

In [18]:
#Get the coordinates for Windsor
address = 'Windsor, CA'

geolocator = Nominatim(user_agent="explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Windsor are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Windsor are 42.317099, -83.0353434.


In [19]:
# create map of Downtown Toronto using latitude and longitude values
map_downtown = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_windsor['Latitude'], df_windsor['Longitude'], df_windsor['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown)  
    
map_downtown

### Get the Latitude and Longtitude for neighborhoods in Detroit

In [20]:
#Get the coordinates for each neighborhood
lat=[] 
lng=[]
for neighborhood in df_detroit['Neighborhood']:
    try:
        location = geolocator.geocode(neighborhood+', Detroit, Michigan, US')
        lat.append(location.latitude)
        lng.append(location.longitude)
    except:
        df_detroit=df_detroit[df_detroit.Neighborhood!=neighborhood]

In [21]:
#print(len(lat))
#print(len(lng))
#print(df_detroit.shape)
df_detroit["Latitude"]=lat
df_detroit["Longitude"]=lng
df_detroit.head()

Unnamed: 0,Neighborhood,Borough,City,Country,Latitude,Longitude
0,Bricktown,Downtown,Detroit,US,42.333455,-83.041169
1,Broadway Ave.,Downtown,Detroit,US,42.335453,-83.048193
2,Campus Martius,Downtown,Detroit,US,42.332544,-83.047451
3,Capital Park,Downtown,Detroit,US,42.34304,-83.446643
4,Downtown,Downtown,Detroit,US,42.325961,-83.05676


### Plot the neighborhoods in Detroit on a map

In [22]:
address = 'Detroit, Michigan, US'

geolocator = Nominatim(user_agent="downtown_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Detroit are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Detroit are 42.3315509, -83.0466403.


In [23]:
map_downtown = folium.Map(location=[latitude, longitude], zoom_start=13)

# add markers to map
for lat, lng, label in zip(df_detroit['Latitude'], df_detroit['Longitude'], df_detroit['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown)  
    
map_downtown

### Combine the data for both cities into one dataframe and plot on a map 

In [24]:
df_cities=df_windsor.append(df_detroit, ignore_index=True)
df_cities.head()

Unnamed: 0,Neighborhood,Borough,City,Country,Latitude,Longitude
0,Little Italy,Downtown,Windsor,CA,42.313629,-83.020592
1,South Walkerville,Downtown,Windsor,CA,42.295651,-82.996032
2,Walkerville,Downtown,Windsor,CA,42.307091,-82.994213
3,Central Windsor,Downtown,Windsor,CA,42.317099,-83.035343
4,East Windsor,East End,Windsor,CA,42.312449,-82.978921


In [25]:
map_downtown = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(df_cities['Latitude'], df_cities['Longitude'], df_cities['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown)  
    
map_downtown

### Use Foursquare API to get the venues in each neighborhood

In [26]:
# @hidden_cell'
CLIENT_ID = 'AX5DG4HSWTT24GDZKERVGHQ3VAQGZY4B3DQ13WGNDVCGSG0Q' # your Foursquare ID
CLIENT_SECRET = '2LME0HZD0NZTYQ54YKTZLI2AAKWQHYUBZLHTOZIEYUVUCIEZ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: AX5DG4HSWTT24GDZKERVGHQ3VAQGZY4B3DQ13WGNDVCGSG0Q
CLIENT_SECRET:2LME0HZD0NZTYQ54YKTZLI2AAKWQHYUBZLHTOZIEYUVUCIEZ


In [27]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [28]:
LIMIT = 100 # limit of number of venues returned by Foursquare AP
radius = 500 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude,
longitude,
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=AX5DG4HSWTT24GDZKERVGHQ3VAQGZY4B3DQ13WGNDVCGSG0Q&client_secret=2LME0HZD0NZTYQ54YKTZLI2AAKWQHYUBZLHTOZIEYUVUCIEZ&v=20180605&ll=42.3315509,-83.0466403&radius=500&limit=100'

In [29]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e9ef1f8f7706a001bcf9fe8'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-456fd59ff964a520333e1fe3-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d163941735',
         'name': 'Park',
         'pluralName': 'Parks',
         'primary': True,
         'shortName': 'Park'}],
       'id': '456fd59ff964a520333e1fe3',
       'location': {'address': 'Woodward Ave',
        'cc': 'US',
        'city': 'Detroit',
        'country': 'United States',
        'crossStreet': 'at Michigan Ave',
        'distance': 4,
        'formattedAddress': ['Woodward Ave (at Michigan Ave)',
         'Detroit, MI 48226',
         'United States'],
        'labeledLatLngs': [{'

In [30]:
LIMIT = 100 # limit of number of venues returned by Foursquare AP
radius = 500 # define radius
#function for getting the venues for all the neighborhoods
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [31]:
#calling the function to get all the venues near each neighborhood
venues = getNearbyVenues(names=df_cities['Neighborhood'],
                                   latitudes=df_cities['Latitude'],
                                   longitudes=df_cities['Longitude']
                                  )

Little Italy
South Walkerville
Walkerville
Central Windsor
East Windsor
Ford City
Forest Glade
Fontainebleau
Pillette Road Village
Riverside
Bridgeview
Brighton Beach
Morton Industrial Park
Ojibway
Sandwich
South Cameron Woodlot
West Windsor
Devonshire Heights
Remington Park
Roseland
South Windsor
Bricktown
Broadway Ave.
Campus Martius
Capital Park
Downtown
Financial District
Greektown
Griswold Street
West Jefferson Avenue
Park Avenue
Randolph Street Commercial
Washington Blvd.
Woodward Avenue
Art Center
Brush Park
Cass Corridor
Cass-Davenport
Cass Park
Medical Center
East Ferry Avenue
Jeffries
Midtown
Wayne State University
Woodbridge
Woodward East
Woodward Avenue
Atkinson Ave.
Boston-Edison
Henry Ford Hospital
New Center
Piquette Ave.
Woodward Avenue
Chaldean Town
Detroit Golf Club
Martin Park
Palmer Park
Palmer Woods
Sherwood Forest
University District
University of Detroit Mercy
Chandler Park
Conant Gardens
Fox Creek
Greenbriar
Grixdale
Hamtramck
Krainz Woods
Milwaukee Junction
Moh

In [32]:
print(venues.shape)
venues.head()

(2970, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Little Italy,42.313629,-83.020592,Motor Burger,42.31361,-83.021081,Burger Joint
1,Little Italy,42.313629,-83.020592,Spago,42.312412,-83.024145,Italian Restaurant
2,Little Italy,42.313629,-83.020592,Italia Bakery,42.312105,-83.025012,Bakery
3,Little Italy,42.313629,-83.020592,Enzo's Trattoria,42.314253,-83.018467,Italian Restaurant
4,Little Italy,42.313629,-83.020592,Mezzo Restaurant & Lounge,42.312916,-83.022706,Italian Restaurant


### Exploratory Data Analysis

In [33]:
venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Art Center,26,26,26,26,26,26
Atkinson Ave.,7,7,7,7,7,7
Atkinson Avenue,7,7,7,7,7,7
Aviation,4,4,4,4,4,4
Bagley,5,5,5,5,5,5
Barton-McFarland,5,5,5,5,5,5
Belmont,23,23,23,23,23,23
Boston-Edison,4,4,4,4,4,4
Boynton,2,2,2,2,2,2
Bricktown,51,51,51,51,51,51


In [34]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 235 uniques categories.


### Prepare Dataframe for K-means Clustering

In [35]:
#create a df suitable for clustering 
# one hot encoding
df_onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
df_onehot['Neighborhood'] = venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [df_onehot.columns[-1]] + list(df_onehot.columns[:-1])
df_onehot = df_onehot[fixed_columns]

df_onehot.set_index('Neighborhood', inplace=True)
df_onehot.reset_index(inplace=True)

df_onehot.head()

Unnamed: 0,Neighborhood,Yoga Studio,ATM,Accessories Store,American Restaurant,Antique Shop,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Bike Shop,Boat or Ferry,Bookstore,Border Crossing,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Building,Burger Joint,Bus Station,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Candy Store,Casino,Cheese Shop,Child Care Service,Chinese Restaurant,Circus,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Country Dance Club,Credit Union,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Distillery,Dive Bar,Doctor's Office,Dog Run,Donut Shop,Dry Cleaner,Duty-free Shop,Electronics Store,Ethiopian Restaurant,Event Space,Factory,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Flower Shop,Food,Food Court,Food Service,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Garden Center,Gas Station,Gay Bar,German Restaurant,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Hardware Store,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hockey Arena,Home Service,Hospital,Hostel,Hot Dog Joint,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Insurance Office,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Lake,Latin American Restaurant,Laundromat,Leather Goods Store,Light Rail Station,Lighthouse,Liquor Store,Locksmith,Lounge,Market,Martial Arts Dojo,Medical Supply Store,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Motel,Movie Theater,Music Store,Music Venue,National Park,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Other Nightlife,Outdoor Sculpture,Paper / Office Supplies Store,Park,Performing Arts Venue,Perfume Shop,Pet Service,Pet Store,Pharmacy,Piano Bar,Pizza Place,Playground,Plaza,Pool,Poutine Place,Pub,Public Art,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Rest Area,Restaurant,Rock Club,Romanian Restaurant,Salad Place,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shipping Store,Shoe Store,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,Southern / Soul Food Restaurant,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Steakhouse,Storage Facility,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Thrift / Vintage Store,Trail,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store
0,Little Italy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Little Italy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Little Italy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Little Italy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Little Italy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [36]:
#check mean for each venue category in each neighborhood
df_grouped = df_onehot.groupby('Neighborhood').mean().reset_index()
df_grouped

Unnamed: 0,Neighborhood,Yoga Studio,ATM,Accessories Store,American Restaurant,Antique Shop,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Bike Shop,Boat or Ferry,Bookstore,Border Crossing,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Building,Burger Joint,Bus Station,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Candy Store,Casino,Cheese Shop,Child Care Service,Chinese Restaurant,Circus,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Country Dance Club,Credit Union,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Distillery,Dive Bar,Doctor's Office,Dog Run,Donut Shop,Dry Cleaner,Duty-free Shop,Electronics Store,Ethiopian Restaurant,Event Space,Factory,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Flower Shop,Food,Food Court,Food Service,Food Truck,Football Stadium,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Garden Center,Gas Station,Gay Bar,German Restaurant,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Hardware Store,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hockey Arena,Home Service,Hospital,Hostel,Hot Dog Joint,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Insurance Office,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Lake,Latin American Restaurant,Laundromat,Leather Goods Store,Light Rail Station,Lighthouse,Liquor Store,Locksmith,Lounge,Market,Martial Arts Dojo,Medical Supply Store,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Motel,Movie Theater,Music Store,Music Venue,National Park,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Other Nightlife,Outdoor Sculpture,Paper / Office Supplies Store,Park,Performing Arts Venue,Perfume Shop,Pet Service,Pet Store,Pharmacy,Piano Bar,Pizza Place,Playground,Plaza,Pool,Poutine Place,Pub,Public Art,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Rest Area,Restaurant,Rock Club,Romanian Restaurant,Salad Place,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shipping Store,Shoe Store,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,Southern / Soul Food Restaurant,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Steakhouse,Storage Facility,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Thrift / Vintage Store,Trail,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store
0,Art Center,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.115385,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.038462,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Atkinson Ave.,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Atkinson Avenue,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Aviation,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bagley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Barton-McFarland,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Belmont,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.086957,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Boston-Edison,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Boynton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Bricktown,0.0,0.0,0.0,0.058824,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.019608,0.039216,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.039216,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.019608,0.019608,0.039216,0.0,0.0,0.019608,0.039216,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.039216,0.0,0.019608,0.0,0.0,0.0,0.039216,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.039216,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.019608,0.0,0.019608,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.039216,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [58]:
## Display top5 venue catogory in each neighborhood
num_top_venues = 5

for hood in df_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = df_grouped[df_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Art Center----
          venue  freq
0          Café  0.12
1  Intersection  0.08
2      Boutique  0.08
3    Art Museum  0.08
4   Pizza Place  0.08


----Atkinson Ave.----
               venue  freq
0       Intersection  0.29
1     Hardware Store  0.14
2  Mobile Phone Shop  0.14
3      Garden Center  0.14
4     Rental Service  0.14


----Atkinson Avenue----
               venue  freq
0       Intersection  0.29
1     Hardware Store  0.14
2  Mobile Phone Shop  0.14
3      Garden Center  0.14
4     Rental Service  0.14


----Aviation----
               venue  freq
0  Convenience Store  0.25
1        IT Services  0.25
2        Pizza Place  0.25
3             Lounge  0.25
4  Outdoor Sculpture  0.00


----Bagley----
            venue  freq
0       BBQ Joint   0.2
1    Intersection   0.2
2            Food   0.2
3  Clothing Store   0.2
4   Grocery Store   0.2


----Barton-McFarland----
                 venue  freq
0                  Pub   0.2
1                 Food   0.2
2  American Restaur

In [56]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [57]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = df_grouped['Neighborhood']

for ind in np.arange(df_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(df_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Art Center,Café,History Museum,Art Museum,Pizza Place,Intersection,Boutique,Pharmacy,Garden,Art Gallery,Science Museum
1,Atkinson Ave.,Intersection,Mobile Phone Shop,Hardware Store,Furniture / Home Store,Rental Service,Garden Center,Electronics Store,Ethiopian Restaurant,Duty-free Shop,Distillery
2,Atkinson Avenue,Intersection,Mobile Phone Shop,Hardware Store,Furniture / Home Store,Rental Service,Garden Center,Electronics Store,Ethiopian Restaurant,Duty-free Shop,Distillery
3,Aviation,Convenience Store,Pizza Place,IT Services,Lounge,Women's Store,Dive Bar,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant
4,Bagley,Food,Clothing Store,Grocery Store,Intersection,BBQ Joint,Event Space,Factory,Ethiopian Restaurant,Dive Bar,Duty-free Shop


### K-means Clustering

In [40]:
# set number of clusters
kclusters = 5

df_grouped_clustering = df_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 1, 1, 1, 1, 2, 1], dtype=int32)

In [41]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

downtown_merged = df_cities

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

downtown_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Borough,City,Country,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Little Italy,Downtown,Windsor,CA,42.313629,-83.020592,1.0,Italian Restaurant,Burger Joint,Bakery,Women's Store,Doctor's Office,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant
1,South Walkerville,Downtown,Windsor,CA,42.295651,-82.996032,1.0,Coffee Shop,Pizza Place,Gas Station,Women's Store,Distillery,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store
2,Walkerville,Downtown,Windsor,CA,42.307091,-82.994213,1.0,Construction & Landscaping,Rental Car Location,Music Store,Thrift / Vintage Store,Women's Store,Distillery,Factory,Event Space,Ethiopian Restaurant,Electronics Store
3,Central Windsor,Downtown,Windsor,CA,42.317099,-83.035343,1.0,Bar,Lounge,Café,Coffee Shop,Restaurant,Pizza Place,Middle Eastern Restaurant,Concert Hall,Bank,Pub
4,East Windsor,East End,Windsor,CA,42.312449,-82.978921,2.0,Home Service,Basketball Court,Park,Dive Bar,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store


In [42]:
import math
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighborhood'], downtown_merged['Cluster Labels']):
    if math.isnan(cluster)!=True:
        label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
        folium.CircleMarker(
            [lat, lon],
            radius=5,
            popup=label,
            color=rainbow[int(cluster-1)],
            fill=True,
            fill_color=rainbow[int(cluster-1)],
            fill_opacity=0.7).add_to(map_clusters)
    
       
map_clusters

### Identifying Sutaible labels for each cluster

In [43]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 0, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,West Side,-83.073334,0.0,Bar,Café,Coffee Shop,Performing Arts Venue,Distillery,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store
51,New Center,-83.064966,0.0,Bar,Art Gallery,History Museum,Middle Eastern Restaurant,Women's Store,Doctor's Office,Farmers Market,Falafel Restaurant,Factory,Event Space
94,West,-83.150472,0.0,Bar,Women's Store,Flower Shop,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store
96,West,-83.213247,0.0,Art Gallery,Women's Store,Dive Bar,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store
97,West,-83.213247,0.0,Art Gallery,Women's Store,Dive Bar,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store
114,West,-83.229016,0.0,Bar,Theater,Pizza Place,Discount Store,Women's Store,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant
118,Southwest /Near West,-83.11659,0.0,Bar,Home Service,Women's Store,Dive Bar,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store
149,Historic Districts,-83.064966,0.0,Bar,Art Gallery,History Museum,Middle Eastern Restaurant,Women's Store,Doctor's Office,Farmers Market,Falafel Restaurant,Factory,Event Space


In [44]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 1, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown,-83.020592,1.0,Italian Restaurant,Burger Joint,Bakery,Women's Store,Doctor's Office,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant
1,Downtown,-82.996032,1.0,Coffee Shop,Pizza Place,Gas Station,Women's Store,Distillery,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store
2,Downtown,-82.994213,1.0,Construction & Landscaping,Rental Car Location,Music Store,Thrift / Vintage Store,Women's Store,Distillery,Factory,Event Space,Ethiopian Restaurant,Electronics Store
3,Downtown,-83.035343,1.0,Bar,Lounge,Café,Coffee Shop,Restaurant,Pizza Place,Middle Eastern Restaurant,Concert Hall,Bank,Pub
5,East End,-82.948673,1.0,Coffee Shop,Medical Supply Store,Greek Restaurant,Dance Studio,Sandwich Place,Breakfast Spot,Fast Food Restaurant,Beer Store,Thrift / Vintage Store,Pizza Place
6,East End,-82.911933,1.0,Optical Shop,Women's Store,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Dry Cleaner
7,East End,-82.972931,1.0,Fast Food Restaurant,Department Store,Bank,Bar,Theme Park Ride / Attraction,Diner,Sandwich Place,American Restaurant,Martial Arts Dojo,Event Space
8,East End,-82.955028,1.0,Construction & Landscaping,Dance Studio,Business Service,Fast Food Restaurant,Home Service,Cosmetics Shop,Country Dance Club,Farmers Market,Falafel Restaurant,Factory
9,East End,-82.943854,1.0,Pizza Place,Pharmacy,Bank,Coffee Shop,Gas Station,Discount Store,Bar,Grocery Store,Department Store,Dry Cleaner
10,West Side,-83.050122,1.0,Diner,Greek Restaurant,Italian Restaurant,Ice Cream Shop,Duty-free Shop,Doctor's Office,Dog Run,Donut Shop,Dry Cleaner,Electronics Store


In [45]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 2, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East End,-82.978921,2.0,Home Service,Basketball Court,Park,Dive Bar,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store
15,West Side,-83.042389,2.0,Park,Trail,Women's Store,Distillery,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store
56,North,-83.117251,2.0,Pool,Park,Discount Store,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store,Duty-free Shop,Dry Cleaner
63,East,-82.930019,2.0,Park,Pool,Scenic Lookout,Movie Theater,Harbor / Marina,Lighthouse,Event Space,Ethiopian Restaurant,Electronics Store,Duty-free Shop
64,East,-83.447946,2.0,Soccer Field,Park,Trail,Women's Store,Discount Store,Factory,Event Space,Ethiopian Restaurant,Electronics Store,Duty-free Shop
79,East,-83.025464,2.0,Park,Women's Store,Distillery,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store,Duty-free Shop
116,Southwest /Near West,-83.154776,2.0,Park,Seafood Restaurant,Women's Store,Distillery,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store,Duty-free Shop


In [46]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 3, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,Downtown,-83.129093,3.0,Bridge,Women's Store,Dive Bar,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store,Duty-free Shop


In [47]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 4, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,South Side,-82.986301,4.0,Furniture / Home Store,Women's Store,Dive Bar,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store,Duty-free Shop
18,South Side,-83.004876,4.0,Furniture / Home Store,Baseball Field,Women's Store,Flea Market,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store
69,East,-82.977877,4.0,Furniture / Home Store,Women's Store,Dive Bar,Farmers Market,Falafel Restaurant,Factory,Event Space,Ethiopian Restaurant,Electronics Store,Duty-free Shop
