In [1]:
#importing libraries
import numpy as np 
import pandas as pd 

In [2]:
# import library to open URLs
import urllib.request

In [3]:
url = "https://en.wikipedia.org/wiki/List_of_neighborhoods_in_Chicago"

In [4]:
# open url using urllib.request and put HTML into variable page
page = urllib.request.urlopen(url)

#### Using Beautiful soup to scrape the wikipedia page

In [5]:
# import BeautifulSoup library so we can parse HTML
from bs4 import BeautifulSoup

In [6]:
# parse the HTML from our URL into the BeautifulSoup parse tree format
soup = BeautifulSoup(page, "lxml")

In [7]:
# look at the HTML on our web page
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of neighborhoods in Chicago - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"1a70048c-a192-423c-a4f0-e1c719fa030f","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_neighborhoods_in_Chicago","wgTitle":"List of neighborhoods in Chicago","wgCurRevisionId":960797306,"wgRevisionId":960797306,"wgArticleId":1942395,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Commons category link is on Wikidata","Webarchive template wayback links","Neighborhoods in Ch

In [8]:
# find the table and the class and assign it to a variable right_table
right_table=soup.find('table', class_='wikitable sortable')
right_table

<table class="wikitable sortable">
<tbody><tr>
<th>Neighborhood</th>
<th>Community area
</th></tr>
<tr>
<td>Albany Park
</td>
<td><a href="/wiki/Albany_Park,_Chicago" title="Albany Park, Chicago">Albany Park</a>
</td></tr>
<tr>
<td><a class="mw-redirect" href="/wiki/Altgeld_Gardens,_Chicago" title="Altgeld Gardens, Chicago">Altgeld Gardens</a>
</td>
<td>Riverdale
</td></tr>
<tr>
<td><a class="mw-redirect" href="/wiki/Andersonville,_Chicago" title="Andersonville, Chicago">Andersonville</a>
</td>
<td>Edgewater
</td></tr>
<tr>
<td>Archer Heights
</td>
<td><a href="/wiki/Archer_Heights,_Chicago" title="Archer Heights, Chicago">Archer Heights</a>
</td></tr>
<tr>
<td>Armour Square
</td>
<td><a href="/wiki/Armour_Square,_Chicago" title="Armour Square, Chicago">Armour Square</a>
</td></tr>
<tr>
<td>Ashburn
</td>
<td><a href="/wiki/Ashburn,_Chicago" title="Ashburn, Chicago">Ashburn</a>
</td></tr>
<tr>
<td>Ashburn Estates
</td>
<td>Ashburn
</td></tr>
<tr>
<td>Auburn Gresham
</td>
<td><a href="/w

In [9]:
# start looping through each row to get data for the table
A=[]
B=[]

for row in right_table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)== 2:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))

#### convert the table extracted from the webpage into a pandas dataframe

In [10]:
df=pd.DataFrame(A,columns=['Villages'])
df['Community area']=B
df.dtypes

Villages          object
Community area    object
dtype: object

In [11]:
df.head()

Unnamed: 0,Villages,Community area
0,Albany Park\n,Albany Park
1,Altgeld Gardens,Riverdale\n
2,Andersonville,Edgewater\n
3,Archer Heights\n,Archer Heights
4,Armour Square\n,Armour Square


In [12]:
#dropping the extra characters in the postalcode column values
df['Villages'] = df['Villages'].astype(str).replace('\n','', regex=True)

df['Community area'] = df['Community area'].astype(str).replace('\n','', regex=True)

In [13]:
# for grouping the different neighborhoods in the same community area and separate them with a comma I will use the following code
chicago_grouped = df.groupby(['Community area'], as_index=False).agg(lambda x:','.join(x))
chicago_grouped.head(20)

Unnamed: 0,Community area,Villages
0,Albany Park,"Albany Park,Mayfair,North Mayfair,Ravenswood M..."
1,Archer Heights,Archer Heights
2,Armour Square,"Armour Square,Chinatown,Wentworth Gardens"
3,Ashburn,"Ashburn,Ashburn Estates,Beverly View,Crestline..."
4,Auburn Gresham,"Auburn Gresham,Gresham"
5,Austin,"Galewood,The Island,North Austin,South Austin,..."
6,Avalon Park,"Avalon Park,Marynook,Stony Island Park"
7,Avondale,"Avondale,Jackowo,Polish Village,Wacławowo"
8,Belmont Cragin,"Belmont Central,Brickyard,Cragin,Hanson Park"
9,Beverly,"Beverly,East Beverly,West Beverly"


In [14]:
# checking string value for cells that are Not assigned
chicago_grouped.at[10, 'Villages']

'Bridgeport'

In [15]:
chicago_grouped.shape

(78, 2)

#### Get geographical coordinates using geopy and combining it with community area dataframe 

In [16]:
!conda install -c conda-forge geopy --yes

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [17]:
chicago_grouped.head()

Unnamed: 0,Community area,Villages
0,Albany Park,"Albany Park,Mayfair,North Mayfair,Ravenswood M..."
1,Archer Heights,Archer Heights
2,Armour Square,"Armour Square,Chinatown,Wentworth Gardens"
3,Ashburn,"Ashburn,Ashburn Estates,Beverly View,Crestline..."
4,Auburn Gresham,"Auburn Gresham,Gresham"


In [18]:
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values 
geolocator = Nominatim(user_agent="Chicago_food") 
chicago_grouped['Community area Coord']= chicago_grouped['Community area'].apply(geolocator.geocode).apply(lambda x: (x.latitude, x.longitude)) 
chicago_grouped[['Latitude', 'Longitude']] = chicago_grouped['Community area Coord'].apply(pd.Series) 

chicago_grouped.drop(['Community area Coord'], axis=1, inplace=True) 
chicago_grouped

Unnamed: 0,Community area,Villages,Latitude,Longitude
0,Albany Park,"Albany Park,Mayfair,North Mayfair,Ravenswood M...",41.971937,-87.716174
1,Archer Heights,Archer Heights,41.811422,-87.726165
2,Armour Square,"Armour Square,Chinatown,Wentworth Gardens",41.840033,-87.633107
3,Ashburn,"Ashburn,Ashburn Estates,Beverly View,Crestline...",39.043719,-77.487490
4,Auburn Gresham,"Auburn Gresham,Gresham",41.743387,-87.656042
...,...,...,...,...
73,West Lawn,"Ford City,West Lawn",40.329815,-75.994381
74,West Pullman,West Pullman,41.675046,-87.637823
75,West Ridge,"Nortown,Peterson Park,Rosehill,West Ridge,West...",42.003548,-87.696243
76,West Town,"East Village,Noble Square,Polish Downtown,Pula...",41.901421,-87.686166


In [19]:
#get geographical coordinates for Chicago
address = 'Chicago, IL'

geolocator = Nominatim(user_agent="Chicago_food")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Chicago are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Chicago are 41.8755616, -87.6244212.


### Clustering different neighborhoods of Chicago

In [20]:
# importing and downloading all dependencies
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Libraries imported.


### Plot a map to visualize the Community areas of Chicago

In [21]:
# create map of Chicago using latitude and longitude 
map_chicago = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to the map
for lat, lng, label in zip(chicago_grouped['Latitude'], chicago_grouped['Longitude'], chicago_grouped['Community area']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_chicago) 
    
map_chicago
# if map does not render in github please copy the following link to view the map - thanks!
# https://github.com/5vasavi/Coursera_Capstone/blob/master/Neighborhoods%20of%20Chicago_map.jpg

### Use Foursquare API to get the nearest venues

In [22]:
CLIENT_ID = 'TET0RDKKWA10DBJDOVOMW3FZPJOZYVP5V1JQGBCYNTODQVLF' # your Foursquare ID
CLIENT_SECRET = 'HOJTP2R5S3RJL3KERZZBI1AF45LXJTU2QKQ0DUTS2LIBFQ4U' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
ACCESS_TOKEN = '2T0NMOJT1LJVKGDLYJJ5GPLZW0BTVPXPFBLIJHSV2UZOKXZW'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TET0RDKKWA10DBJDOVOMW3FZPJOZYVP5V1JQGBCYNTODQVLF
CLIENT_SECRET:HOJTP2R5S3RJL3KERZZBI1AF45LXJTU2QKQ0DUTS2LIBFQ4U


In [23]:
# get listing of nearby venues 
def getNearbyVenues(names, latitudes, longitudes):
    radius=500 #popular venues within this radius
    LIMIT=100 #Get top 100 venues
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&oauth_token={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET,
            ACCESS_TOKEN,
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)

        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [24]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [25]:
# printing specific community areas latitude and longitude
community_area_latitude = chicago_grouped.loc[0, 'Latitude'] # Community area latitude value
community_area_longitude = chicago_grouped.loc[0, 'Longitude'] # Community area longitude value

community_area_name = chicago_grouped.loc[0, 'Community area'] # Community area name

print('Latitude and longitude values of {} are {}, {}.'.format(community_area_name, 
                                                               community_area_latitude, 
                                                               community_area_longitude))


Latitude and longitude values of Albany Park are 41.9719367, -87.7161739.


In [26]:
# run the function on each neighborhood and create a new dataframe called chicago_venues.
chicago_venues = getNearbyVenues(names=df['Community area'],
                                   latitudes=chicago_grouped['Latitude'],
                                   longitudes=chicago_grouped['Longitude']
                                  )


Albany Park
Riverdale
Edgewater
Archer Heights
Armour Square
Ashburn
Ashburn
Auburn Gresham
Avalon Park
Avondale
Irving Park
New City
Belmont Cragin
Hermosa
Dunning
Dunning
Beverly
Ashburn
Morgan Park
Norwood Park
Lake View
Lincoln Square
Washington Heights
Belmont Cragin
Bridgeport
Brighton Park
Douglas
Logan Square
Lincoln Square
Uptown
Burnside
Near North Side
Calumet Heights
New City
Near South Side
Chatham
Chicago Lawn
Armour Square
Clearing
Uptown
Clearing
Clearing
Pullman
Belmont Cragin
Ashburn
Douglas
Near South Side
North Lawndale
Dunning
Beverly
Chatham
East Garfield Park
Hyde Park
Lower West Side
East Side
West Town
Riverdale
Forest Glen
Edgewater
Edgewater
Edgewater
Edison Park
Englewood
Roseland
East Garfield Park
West Lawn
Forest Glen
Fuller Park
Near West Side
Gage Park
Austin
Douglas
Garfield Ridge
Jefferson Park
Near North Side
Riverdale
Near North Side
Lake View


In [27]:
# check size of resulting dataframe
chicago_venues.shape

(3072, 7)

In [28]:
# Create a Data-Frame concentrating only on Restaurants 
chicago_restaurants = chicago_venues[chicago_venues['Venue Category'].str.contains('Restaurant')].reset_index(drop=True)
chicago_restaurants.index = np.arange(1, len(chicago_restaurants )+1)

print (chicago_restaurants['Venue Category'].value_counts())


Mexican Restaurant                 136
Fast Food Restaurant                54
Chinese Restaurant                  42
American Restaurant                 41
Italian Restaurant                  29
Restaurant                          22
Seafood Restaurant                  16
Latin American Restaurant           14
Sushi Restaurant                    13
Asian Restaurant                    11
Japanese Restaurant                 11
Thai Restaurant                     10
New American Restaurant              8
Mediterranean Restaurant             8
French Restaurant                    8
Vietnamese Restaurant                7
Vegetarian / Vegan Restaurant        5
Korean Restaurant                    5
Indian Restaurant                    3
Eastern European Restaurant          3
Tapas Restaurant                     3
Greek Restaurant                     3
Cajun / Creole Restaurant            3
Cuban Restaurant                     3
Caribbean Restaurant                 3
Afghan Restaurant        

In [29]:
print('There are {} unique categories of restaurants.'.format(len(chicago_restaurants['Venue Category'].unique())))

There are 46 unique categories of restaurants.


In [30]:
# check size of resulting dataframe
print ("Shape of the Data-Frame with Venue Category only Restaurant: ", chicago_restaurants.shape)
chicago_restaurants.head(10)

Shape of the Data-Frame with Venue Category only Restaurant:  (488, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
1,Albany Park,41.971937,-87.716174,Peking Mandarin Resturant,41.968292,-87.715783,Chinese Restaurant
2,Albany Park,41.971937,-87.716174,Banpojung,41.975707,-87.715609,Korean Restaurant
3,Albany Park,41.971937,-87.716174,Seo Hae Kwan,41.968593,-87.717333,Korean Restaurant
4,Albany Park,41.971937,-87.716174,Chifa Grill Chicken Restaurant,41.968593,-87.717333,Latin American Restaurant
5,Albany Park,41.971937,-87.716174,La Fogata,41.968135,-87.714796,Mexican Restaurant
6,Albany Park,41.971937,-87.716174,Pan Po Jung,41.975979,-87.71563,Korean Restaurant
7,Albany Park,41.971937,-87.716174,Gorilla Sushi,41.96824,-87.713559,Sushi Restaurant
8,Albany Park,41.971937,-87.716174,El Siglo XX,41.96847,-87.71957,Latin American Restaurant
9,Albany Park,41.971937,-87.716174,Magic Grill,41.968613,-87.71244,Mexican Restaurant
10,Albany Park,41.971937,-87.716174,El Santo Taqueria,41.975815,-87.713226,Mexican Restaurant


In [31]:
# create a dataframe of top 10 categories of restaurants
chicago_restaurant_top10 = chicago_restaurants['Venue Category'].value_counts()[0:10].to_frame(name='frequency')
chicago_restaurant_top10=chicago_restaurant_top10.reset_index()
chicago_restaurant_top10.rename(index=str, columns={"index": "Restaurant_Category", "frequency": "Frequency"}, inplace=True)
chicago_restaurant_top10


Unnamed: 0,Restaurant_Category,Frequency
0,Mexican Restaurant,136
1,Fast Food Restaurant,54
2,Chinese Restaurant,42
3,American Restaurant,41
4,Italian Restaurant,29
5,Restaurant,22
6,Seafood Restaurant,16
7,Latin American Restaurant,14
8,Sushi Restaurant,13
9,Asian Restaurant,11


Analysing the categories of restaurants in Chicago Area; It can be inferred from the data that Mexican Restaurants are the most popular type of restaurants followed by fast food, Chinese, American, Italian, restaurant, Seafood, Sushi, latin American and Asian

In [32]:
#Analysing neighborhoods to see how many restaurants were returned for each neighborhood
chicago_neighborhood_restaurant = chicago_restaurants.groupby(['Neighborhood'])['Venue Category'].apply(lambda x: x[x.str.contains('Restaurant')].count())
chicago_neighborhood_restaurant

Neighborhood
Albany Park           10
Archer Heights         3
Armour Square          1
Ashburn               18
Auburn Gresham         2
Austin                10
Avalon Park           11
Avondale               2
Belmont Cragin        13
Beverly                6
Bridgeport             1
Brighton Park          3
Burnside               5
Chatham               17
Clearing              37
Douglas               10
Dunning               26
East Garfield Park    26
Edgewater             33
Englewood              9
Forest Glen            6
Fuller Park            7
Garfield Ridge         9
Hyde Park              1
Jefferson Park         1
Lake View             10
Lincoln Square        25
Logan Square           3
Lower West Side       21
Morgan Park            1
Near North Side       19
Near South Side        3
Near West Side        20
New City               7
North Lawndale         8
Norwood Park           2
Pullman               22
Riverdale             48
Uptown                17
Washington H

In [33]:
# create a dataframe of restaurants in neighborhoods
chicago_hood_restaurant_df  = chicago_neighborhood_restaurant.to_frame().reset_index()
chicago_hood_restaurant_df.columns = ['Neighborhood', 'Number of Restaurants']
chicago_hood_restaurant_df.index = np.arange(1, len(chicago_hood_restaurant_df)+1)
list_rest_no =chicago_hood_restaurant_df['Number of Restaurants'].to_list()
list_community_area =chicago_hood_restaurant_df['Neighborhood'].to_list()
chicago_hood_restaurant_df.head()

Unnamed: 0,Neighborhood,Number of Restaurants
1,Albany Park,10
2,Archer Heights,3
3,Armour Square,1
4,Ashburn,18
5,Auburn Gresham,2


In [34]:
# check size of resulting dataframe
chicago_hood_restaurant_df.shape

(41, 2)

In [35]:
# one hot encoding
chicago_onehot = pd.get_dummies(chicago_restaurants[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
chicago_onehot['Neighborhood'] = chicago_restaurants['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [chicago_onehot.columns[-1]] + list(chicago_onehot.columns[:-1])
chicago_onehot = chicago_onehot[fixed_columns]

chicago_onehot.head()


Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Asian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Cambodian Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Cuban Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Fast Food Restaurant,Filipino Restaurant,Fondue Restaurant,French Restaurant,German Restaurant,Greek Restaurant,Hotpot Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Seafood Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Sushi Restaurant,Tapas Restaurant,Tex-Mex Restaurant,Thai Restaurant,Turkish Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
1,Albany Park,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Albany Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Albany Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Albany Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Albany Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [36]:
# check size of resulting dataframe
chicago_onehot.shape

(488, 47)

In [38]:
# Grouping by neighborhoods and the frequency of occurence of each category of restaurant
chicago_onehot_grouped = chicago_onehot.groupby('Neighborhood').mean().reset_index()
chicago_onehot_grouped.head(10)

Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Asian Restaurant,Brazilian Restaurant,Cajun / Creole Restaurant,Cambodian Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Cuban Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Fast Food Restaurant,Filipino Restaurant,Fondue Restaurant,French Restaurant,German Restaurant,Greek Restaurant,Hotpot Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Seafood Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Sushi Restaurant,Tapas Restaurant,Tex-Mex Restaurant,Thai Restaurant,Turkish Restaurant,Ukrainian Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Albany Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.2,0.0,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Archer Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Armour Square,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Ashburn,0.0,0.0,0.111111,0.0,0.0,0.111111,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.055556,0.055556,0.111111,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.055556,0.0,0.0,0.0,0.0
4,Auburn Gresham,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Austin,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Avalon Park,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.545455,0.0,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Avondale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Belmont Cragin,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.307692,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.384615,0.0,0.0,0.0,0.0,0.076923,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Beverly,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.333333,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
# check size of resulting dataframe
chicago_onehot_grouped.shape

(41, 47)

In [40]:
#Print the neighbourhoods with their respective top 10 restaurants
num_top_restaurants = 10

for hood in chicago_onehot_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = chicago_onehot_grouped[chicago_onehot_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_restaurants))
    print('\n')


----Albany Park----
                       venue  freq
0          Korean Restaurant   0.3
1         Mexican Restaurant   0.3
2  Latin American Restaurant   0.2
3           Sushi Restaurant   0.1
4         Chinese Restaurant   0.1
5         Seafood Restaurant   0.0
6   Mediterranean Restaurant   0.0
7  Middle Eastern Restaurant   0.0
8    New American Restaurant   0.0
9        Peruvian Restaurant   0.0


----Archer Heights----
                       venue  freq
0         Italian Restaurant  0.33
1           Greek Restaurant  0.33
2         Chinese Restaurant  0.33
3         Seafood Restaurant  0.00
4  Latin American Restaurant  0.00
5   Mediterranean Restaurant  0.00
6         Mexican Restaurant  0.00
7  Middle Eastern Restaurant  0.00
8    New American Restaurant  0.00
9        Peruvian Restaurant  0.00


----Armour Square----
                       venue  freq
0       Fast Food Restaurant   1.0
1          Afghan Restaurant   0.0
2         Seafood Restaurant   0.0
3  Latin American Res

                       venue  freq
0        American Restaurant  0.25
1         Chinese Restaurant  0.25
2       Fast Food Restaurant  0.25
3         Seafood Restaurant  0.25
4          Afghan Restaurant  0.00
5  South American Restaurant  0.00
6   Mediterranean Restaurant  0.00
7         Mexican Restaurant  0.00
8  Middle Eastern Restaurant  0.00
9    New American Restaurant  0.00




In [41]:
# Create Dataframe for the most common restaurants

# create a function to return most common restaurants
def return_most_common_restaurants(row, num_top_restaurants):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_restaurants]


num_top_restaurants = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top restaurants
columns = ['Neighborhood']
for ind in np.arange(num_top_restaurants):
    try:
        columns.append('{}{} Most Common Restaurant'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Restaurant'.format(ind+1))

# create a new dataframe
neighborhoods_restaurant_sorted = pd.DataFrame(columns=columns)
neighborhoods_restaurant_sorted['Neighborhood'] = chicago_onehot_grouped['Neighborhood']

for ind in np.arange(chicago_onehot_grouped.shape[0]):
    neighborhoods_restaurant_sorted.iloc[ind, 1:] = return_most_common_restaurants(chicago_onehot_grouped.iloc[ind, :], num_top_restaurants)

neighborhoods_restaurant_sorted.head(10)


Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
0,Albany Park,Korean Restaurant,Mexican Restaurant,Latin American Restaurant,Sushi Restaurant,Chinese Restaurant,Vietnamese Restaurant,Dumpling Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
1,Archer Heights,Italian Restaurant,Greek Restaurant,Chinese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant
2,Armour Square,Fast Food Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,Greek Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant,Eastern European Restaurant
3,Ashburn,Fast Food Restaurant,American Restaurant,Italian Restaurant,Cajun / Creole Restaurant,Restaurant,Mexican Restaurant,Chinese Restaurant,Thai Restaurant,Sushi Restaurant,Latin American Restaurant
4,Auburn Gresham,Restaurant,Mexican Restaurant,Hotpot Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant
5,Austin,Mexican Restaurant,Fast Food Restaurant,American Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant
6,Avalon Park,Mexican Restaurant,Restaurant,American Restaurant,Latin American Restaurant,Chinese Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant
7,Avondale,Mediterranean Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant
8,Belmont Cragin,Mexican Restaurant,Chinese Restaurant,Restaurant,Japanese Restaurant,Cantonese Restaurant,Seafood Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
9,Beverly,Restaurant,Mexican Restaurant,Mediterranean Restaurant,Chinese Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant


In [42]:
# check size of resulting dataframe
neighborhoods_restaurant_sorted.shape

(41, 11)

#### Run K-Means to cluster neighborhoods into 5 clusters based on restaurant categories data

In [43]:
# set number of clusters to 5
kclusters = 5

chicago_onehot_grouped_clustering = chicago_onehot_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(chicago_onehot_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]


array([1, 1, 0, 1, 3, 3, 3, 4, 1, 1])

In [44]:
# Merge chicago_grouped dataset with neighborhoods_restaurants_sorted dataset

chicago_grouped.head()

Unnamed: 0,Community area,Villages,Latitude,Longitude
0,Albany Park,"Albany Park,Mayfair,North Mayfair,Ravenswood M...",41.971937,-87.716174
1,Archer Heights,Archer Heights,41.811422,-87.726165
2,Armour Square,"Armour Square,Chinatown,Wentworth Gardens",41.840033,-87.633107
3,Ashburn,"Ashburn,Ashburn Estates,Beverly View,Crestline...",39.043719,-77.48749
4,Auburn Gresham,"Auburn Gresham,Gresham",41.743387,-87.656042


In [45]:
chicago_grouped.shape

(78, 4)

In [46]:
neighborhoods_restaurant_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
0,Albany Park,Korean Restaurant,Mexican Restaurant,Latin American Restaurant,Sushi Restaurant,Chinese Restaurant,Vietnamese Restaurant,Dumpling Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
1,Archer Heights,Italian Restaurant,Greek Restaurant,Chinese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant
2,Armour Square,Fast Food Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,Greek Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant,Eastern European Restaurant
3,Ashburn,Fast Food Restaurant,American Restaurant,Italian Restaurant,Cajun / Creole Restaurant,Restaurant,Mexican Restaurant,Chinese Restaurant,Thai Restaurant,Sushi Restaurant,Latin American Restaurant
4,Auburn Gresham,Restaurant,Mexican Restaurant,Hotpot Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant


In [47]:
neighborhoods_restaurant_sorted.shape

(41, 11)

In [48]:
#add clustering labels
neighborhoods_restaurant_sorted_w_clusters = neighborhoods_restaurant_sorted

neighborhoods_restaurant_sorted_w_clusters.head(10)

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
0,Albany Park,Korean Restaurant,Mexican Restaurant,Latin American Restaurant,Sushi Restaurant,Chinese Restaurant,Vietnamese Restaurant,Dumpling Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
1,Archer Heights,Italian Restaurant,Greek Restaurant,Chinese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant
2,Armour Square,Fast Food Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,Greek Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant,Eastern European Restaurant
3,Ashburn,Fast Food Restaurant,American Restaurant,Italian Restaurant,Cajun / Creole Restaurant,Restaurant,Mexican Restaurant,Chinese Restaurant,Thai Restaurant,Sushi Restaurant,Latin American Restaurant
4,Auburn Gresham,Restaurant,Mexican Restaurant,Hotpot Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant
5,Austin,Mexican Restaurant,Fast Food Restaurant,American Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant
6,Avalon Park,Mexican Restaurant,Restaurant,American Restaurant,Latin American Restaurant,Chinese Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant
7,Avondale,Mediterranean Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant
8,Belmont Cragin,Mexican Restaurant,Chinese Restaurant,Restaurant,Japanese Restaurant,Cantonese Restaurant,Seafood Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
9,Beverly,Restaurant,Mexican Restaurant,Mediterranean Restaurant,Chinese Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant


In [49]:
#add clustering labels
neighborhoods_restaurant_sorted_w_clusters.insert(0, 'Cluster Labels', kmeans.labels_)

neighborhoods_restaurant_sorted_w_clusters.head(10)

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
0,1,Albany Park,Korean Restaurant,Mexican Restaurant,Latin American Restaurant,Sushi Restaurant,Chinese Restaurant,Vietnamese Restaurant,Dumpling Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
1,1,Archer Heights,Italian Restaurant,Greek Restaurant,Chinese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant
2,0,Armour Square,Fast Food Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,Greek Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant,Eastern European Restaurant
3,1,Ashburn,Fast Food Restaurant,American Restaurant,Italian Restaurant,Cajun / Creole Restaurant,Restaurant,Mexican Restaurant,Chinese Restaurant,Thai Restaurant,Sushi Restaurant,Latin American Restaurant
4,3,Auburn Gresham,Restaurant,Mexican Restaurant,Hotpot Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant
5,3,Austin,Mexican Restaurant,Fast Food Restaurant,American Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant
6,3,Avalon Park,Mexican Restaurant,Restaurant,American Restaurant,Latin American Restaurant,Chinese Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant
7,4,Avondale,Mediterranean Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant
8,1,Belmont Cragin,Mexican Restaurant,Chinese Restaurant,Restaurant,Japanese Restaurant,Cantonese Restaurant,Seafood Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
9,1,Beverly,Restaurant,Mexican Restaurant,Mediterranean Restaurant,Chinese Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant


In [50]:
neighborhoods_restaurant_sorted_w_clusters.shape

(41, 12)

In [51]:
# save chicago_grouped as chicago_merged
chicago_merged = chicago_grouped
chicago_merged.head()

Unnamed: 0,Community area,Villages,Latitude,Longitude
0,Albany Park,"Albany Park,Mayfair,North Mayfair,Ravenswood M...",41.971937,-87.716174
1,Archer Heights,Archer Heights,41.811422,-87.726165
2,Armour Square,"Armour Square,Chinatown,Wentworth Gardens",41.840033,-87.633107
3,Ashburn,"Ashburn,Ashburn Estates,Beverly View,Crestline...",39.043719,-77.48749
4,Auburn Gresham,"Auburn Gresham,Gresham",41.743387,-87.656042


In [52]:
chicago_merged.rename(columns={'Community area':'Neighborhood'}, inplace=True)

chicago_merged = chicago_merged.join(neighborhoods_restaurant_sorted.set_index('Neighborhood'), on='Neighborhood')

chicago_merged.head()


Unnamed: 0,Neighborhood,Villages,Latitude,Longitude,Cluster Labels,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
0,Albany Park,"Albany Park,Mayfair,North Mayfair,Ravenswood M...",41.971937,-87.716174,1.0,Korean Restaurant,Mexican Restaurant,Latin American Restaurant,Sushi Restaurant,Chinese Restaurant,Vietnamese Restaurant,Dumpling Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
1,Archer Heights,Archer Heights,41.811422,-87.726165,1.0,Italian Restaurant,Greek Restaurant,Chinese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant
2,Armour Square,"Armour Square,Chinatown,Wentworth Gardens",41.840033,-87.633107,0.0,Fast Food Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,Greek Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant,Eastern European Restaurant
3,Ashburn,"Ashburn,Ashburn Estates,Beverly View,Crestline...",39.043719,-77.48749,1.0,Fast Food Restaurant,American Restaurant,Italian Restaurant,Cajun / Creole Restaurant,Restaurant,Mexican Restaurant,Chinese Restaurant,Thai Restaurant,Sushi Restaurant,Latin American Restaurant
4,Auburn Gresham,"Auburn Gresham,Gresham",41.743387,-87.656042,3.0,Restaurant,Mexican Restaurant,Hotpot Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant


In [52]:
chicago_merged.head(50)

Unnamed: 0,Neighborhood,Villages,Latitude,Longitude,Cluster Labels,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
0,Albany Park,"Albany Park,Mayfair,North Mayfair,Ravenswood M...",41.971937,-87.716174,1.0,Korean Restaurant,Mexican Restaurant,Latin American Restaurant,Sushi Restaurant,Chinese Restaurant,Vietnamese Restaurant,Eastern European Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant
1,Archer Heights,Archer Heights,41.811422,-87.726165,0.0,Italian Restaurant,Chinese Restaurant,Greek Restaurant,Eastern European Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant
2,Armour Square,"Armour Square,Chinatown,Wentworth Gardens",41.840033,-87.633107,2.0,Fast Food Restaurant,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Falafel Restaurant,Empanada Restaurant,Eastern European Restaurant
3,Ashburn,"Ashburn,Ashburn Estates,Beverly View,Crestline...",39.043719,-77.48749,1.0,Fast Food Restaurant,Restaurant,Cajun / Creole Restaurant,Sushi Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Cuban Restaurant,Filipino Restaurant,Falafel Restaurant
4,Auburn Gresham,"Auburn Gresham,Gresham",41.743387,-87.656042,4.0,Restaurant,Mexican Restaurant,Vietnamese Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Empanada Restaurant,Eastern European Restaurant
5,Austin,"Galewood,The Island,North Austin,South Austin,...",30.271129,-97.7437,4.0,Mexican Restaurant,Fast Food Restaurant,American Restaurant,Vietnamese Restaurant,Eastern European Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Falafel Restaurant,Empanada Restaurant
6,Avalon Park,"Avalon Park,Marynook,Stony Island Park",41.745035,-87.588658,4.0,Mexican Restaurant,Restaurant,American Restaurant,Latin American Restaurant,Chinese Restaurant,Vietnamese Restaurant,Eastern European Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant
7,Avondale,"Avondale,Jackowo,Polish Village,Wacławowo",33.435499,-112.349557,1.0,Mediterranean Restaurant,Vietnamese Restaurant,Greek Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Empanada Restaurant,Eastern European Restaurant
8,Belmont Cragin,"Belmont Central,Brickyard,Cragin,Hanson Park",41.931698,-87.76867,1.0,Mexican Restaurant,Chinese Restaurant,Restaurant,Japanese Restaurant,Cantonese Restaurant,Seafood Restaurant,Eastern European Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant
9,Beverly,"Beverly,East Beverly,West Beverly",42.558428,-70.880049,1.0,Restaurant,Mexican Restaurant,Mediterranean Restaurant,Chinese Restaurant,Vietnamese Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Empanada Restaurant


In [53]:
# dropping rows with empty cells
chicago_merged.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)
chicago_merged.head(20)

Unnamed: 0,Neighborhood,Villages,Latitude,Longitude,Cluster Labels,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
0,Albany Park,"Albany Park,Mayfair,North Mayfair,Ravenswood M...",41.971937,-87.716174,1.0,Korean Restaurant,Mexican Restaurant,Latin American Restaurant,Sushi Restaurant,Chinese Restaurant,Vietnamese Restaurant,Dumpling Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
1,Archer Heights,Archer Heights,41.811422,-87.726165,1.0,Italian Restaurant,Greek Restaurant,Chinese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant
2,Armour Square,"Armour Square,Chinatown,Wentworth Gardens",41.840033,-87.633107,0.0,Fast Food Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,Greek Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant,Eastern European Restaurant
3,Ashburn,"Ashburn,Ashburn Estates,Beverly View,Crestline...",39.043719,-77.48749,1.0,Fast Food Restaurant,American Restaurant,Italian Restaurant,Cajun / Creole Restaurant,Restaurant,Mexican Restaurant,Chinese Restaurant,Thai Restaurant,Sushi Restaurant,Latin American Restaurant
4,Auburn Gresham,"Auburn Gresham,Gresham",41.743387,-87.656042,3.0,Restaurant,Mexican Restaurant,Hotpot Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant
5,Austin,"Galewood,The Island,North Austin,South Austin,...",30.271129,-97.7437,3.0,Mexican Restaurant,Fast Food Restaurant,American Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant
6,Avalon Park,"Avalon Park,Marynook,Stony Island Park",41.745035,-87.588658,3.0,Mexican Restaurant,Restaurant,American Restaurant,Latin American Restaurant,Chinese Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant
7,Avondale,"Avondale,Jackowo,Polish Village,Wacławowo",33.435499,-112.349557,4.0,Mediterranean Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant
8,Belmont Cragin,"Belmont Central,Brickyard,Cragin,Hanson Park",41.931698,-87.76867,1.0,Mexican Restaurant,Chinese Restaurant,Restaurant,Japanese Restaurant,Cantonese Restaurant,Seafood Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
9,Beverly,"Beverly,East Beverly,West Beverly",42.558428,-70.880049,1.0,Restaurant,Mexican Restaurant,Mediterranean Restaurant,Chinese Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant


In [54]:
chicago_merged.shape

(41, 15)

In [55]:
# create a map with folium
map_clusters = folium.Map(location=[latitude,longitude], zoom_start=11)

# set color scheme for the five clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
for lat, lon, poi, cluster in zip(chicago_merged['Latitude'], 
                                  chicago_merged['Longitude'], 
                                  chicago_merged['Neighborhood'], 
                                  chicago_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=list_rest_no[list_community_area.index(poi)]*0.5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# if teh map does not get rendered in github please copy the link below to view the image of the map - thanks!
# https://github.com/5vasavi/Coursera_Capstone/blob/master/Clusters%20of%20restaurants%20in%20Chicago.jpg

#### Examining each cluster

#### Cluster 1

In [57]:
chicago_merged.loc[chicago_merged['Cluster Labels'] == 0, chicago_merged.columns[[0] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
2,Armour Square,Fast Food Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,Greek Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant,Eastern European Restaurant
27,Garfield Ridge,Fast Food Restaurant,Middle Eastern Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant,Eastern European Restaurant
33,Hyde Park,Fast Food Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,Greek Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant,Eastern European Restaurant
37,Lake View,Fast Food Restaurant,American Restaurant,Seafood Restaurant,Tex-Mex Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
54,Norwood Park,American Restaurant,Fast Food Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,Greek Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant


Cluster 1 seem to have predominantly FastFood, Vietnamese, Dimsum restaurants. If a visitor were craving Indian or Mexican food these neighborhoods would definitely not be a good choice for them.

#### Cluster 2

In [58]:
chicago_merged.loc[chicago_merged['Cluster Labels'] == 1, chicago_merged.columns[[0] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
0,Albany Park,Korean Restaurant,Mexican Restaurant,Latin American Restaurant,Sushi Restaurant,Chinese Restaurant,Vietnamese Restaurant,Dumpling Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
1,Archer Heights,Italian Restaurant,Greek Restaurant,Chinese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant
3,Ashburn,Fast Food Restaurant,American Restaurant,Italian Restaurant,Cajun / Creole Restaurant,Restaurant,Mexican Restaurant,Chinese Restaurant,Thai Restaurant,Sushi Restaurant,Latin American Restaurant
8,Belmont Cragin,Mexican Restaurant,Chinese Restaurant,Restaurant,Japanese Restaurant,Cantonese Restaurant,Seafood Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant
9,Beverly,Restaurant,Mexican Restaurant,Mediterranean Restaurant,Chinese Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant
10,Bridgeport,Italian Restaurant,Dim Sum Restaurant,Greek Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant
11,Brighton Park,Restaurant,Fast Food Restaurant,Chinese Restaurant,Vietnamese Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant,Eastern European Restaurant
14,Chatham,Chinese Restaurant,Japanese Restaurant,American Restaurant,Eastern European Restaurant,Restaurant,Filipino Restaurant,Sushi Restaurant,Caribbean Restaurant,Mexican Restaurant,Dumpling Restaurant
16,Clearing,Italian Restaurant,American Restaurant,French Restaurant,New American Restaurant,Fast Food Restaurant,Latin American Restaurant,Asian Restaurant,Seafood Restaurant,Mexican Restaurant,Vietnamese Restaurant
18,Dunning,Fast Food Restaurant,Mexican Restaurant,American Restaurant,Chinese Restaurant,Restaurant,Seafood Restaurant,Caribbean Restaurant,New American Restaurant,Cajun / Creole Restaurant,Sushi Restaurant


Cluster 2 seems to have predominantly Mexican, Tapas, Latin American, and Italian Restaurants. visitors would be better off going to cluster 1 if they are craving asian food.

#### Cluster 3

In [59]:
chicago_merged.loc[chicago_merged['Cluster Labels'] == 2, chicago_merged.columns[[0] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
35,Jefferson Park,American Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,Greek Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant
45,Morgan Park,American Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,Greek Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant


Cluster 3 seems to have predominantly American, Vietnamese and Dim Sum Restaurants. These neighborhoods also seem to have more niche restaurants such as Greek, French, Fondue and Filipino which suggest the neighborhoods could be more relaxed and quasi-suburban.

#### Cluster 4

In [60]:
chicago_merged.loc[chicago_merged['Cluster Labels'] == 3, chicago_merged.columns[[0] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
4,Auburn Gresham,Restaurant,Mexican Restaurant,Hotpot Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant
5,Austin,Mexican Restaurant,Fast Food Restaurant,American Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant
6,Avalon Park,Mexican Restaurant,Restaurant,American Restaurant,Latin American Restaurant,Chinese Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant
12,Burnside,Mexican Restaurant,American Restaurant,Chinese Restaurant,Vietnamese Restaurant,Dumpling Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant
17,Douglas,Mexican Restaurant,Fast Food Restaurant,American Restaurant,Asian Restaurant,Southern / Soul Food Restaurant,Vietnamese Restaurant,Dumpling Restaurant,German Restaurant,French Restaurant,Fondue Restaurant
19,East Garfield Park,Mexican Restaurant,Restaurant,German Restaurant,Turkish Restaurant,Thai Restaurant,Sushi Restaurant,Korean Restaurant,Seafood Restaurant,Latin American Restaurant,Chinese Restaurant
23,Englewood,Mexican Restaurant,Italian Restaurant,Fast Food Restaurant,Seafood Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant
41,Logan Square,Mexican Restaurant,Fast Food Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant,Eastern European Restaurant
50,New City,Mexican Restaurant,American Restaurant,Fast Food Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant
58,Pullman,Mexican Restaurant,Latin American Restaurant,Fast Food Restaurant,Seafood Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Empanada Restaurant


Cluster 4 seems to predominantly have Mexican and American restaurants.

In [61]:
chicago_merged.loc[chicago_merged['Cluster Labels'] == 4, chicago_merged.columns[[0] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Restaurant,2nd Most Common Restaurant,3rd Most Common Restaurant,4th Most Common Restaurant,5th Most Common Restaurant,6th Most Common Restaurant,7th Most Common Restaurant,8th Most Common Restaurant,9th Most Common Restaurant,10th Most Common Restaurant
7,Avondale,Mediterranean Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,German Restaurant,French Restaurant,Fondue Restaurant,Filipino Restaurant,Fast Food Restaurant,Empanada Restaurant,Eastern European Restaurant


Cluster 5 has predominantly Mediterranean and Vietnamese restaurants.