# Exploring Bangalore

In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


# Web scarping to explore the Eastern bangalore

In [3]:
#importing lib for web scarping
!pip install bs4
!pip install html5lib

import bs4
import html5lib



In [4]:
#Requests the wikipedia page html
url = 'https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Bangalore'
page = requests.get(url)
#print(page.content)

In [5]:
#Creating soup object.
from bs4 import BeautifulSoup
soup = bs4.BeautifulSoup(page.content,'html5lib')
soup

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en"><head>
<meta charset="utf-8"/>
<title>List of neighbourhoods in Bangalore - Wikipedia</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"7f63a277-f5e6-49ae-8d12-cf28754f6e1c","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_neighbourhoods_in_Bangalore","wgTitle":"List of neighbourhoods in Bangalore","wgCurRevisionId":1012065169,"wgRevisionId":1012065169,"wgArticleId":53117184,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Use dmy dates from August 2017","Use Indian English from August 2017","All Wikipedia articles written

In [6]:
#finding the eastern neighborhoods.

all_tables = soup.findAll('table')

East_bangalore_table = all_tables[1].findAll('td')

In [7]:
#creating dataframe of neighborhoods in eastern bangalore
dummy_list = []
for i,row in enumerate(East_bangalore_table):
    if i%3 == 0:
        dummy_list.append(row.get_text())

east_neigh = []
for item in dummy_list:
    t = item.strip('\n')
    east_neigh.append(t)

print(east_neigh)

['Bellandur', 'CV Raman Nagar', 'Hoodi', 'Krishnarajapuram', 'Mahadevapura', 'Marathahalli', 'Varthur', 'Whitefield']


In [8]:
#creating dataframe
neighborhoods = pd.DataFrame(east_neigh, columns = ['Eastern Neighborhoods'])
neighborhoods

Unnamed: 0,Eastern Neighborhoods
0,Bellandur
1,CV Raman Nagar
2,Hoodi
3,Krishnarajapuram
4,Mahadevapura
5,Marathahalli
6,Varthur
7,Whitefield


In [9]:
latitudes = []
longitudes = []
for item in neighborhoods.values:
    address = str(item + ',' + 'Bangalore')
    geolocator = Nominatim(user_agent="ON_explorer")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    latitudes.append(latitude)
    longitudes.append(longitude)
    #print(latitude,longitude)
print(latitudes)
print(longitudes)

[12.93577245, 12.9856478, 12.9919033, 13.0005089, 12.9934976, 12.9552572, 12.9406152, 12.9696365]
[77.66676103753434, 77.6649767, 77.7162015, 77.6753945, 77.6924156, 77.6984163, 77.7469937, 77.7497448]


In [10]:
latlon = pd.DataFrame(zip(latitudes,longitudes),columns = ['Latitude','Longitude'])
latlon

Unnamed: 0,Latitude,Longitude
0,12.935772,77.666761
1,12.985648,77.664977
2,12.991903,77.716201
3,13.000509,77.675394
4,12.993498,77.692416
5,12.955257,77.698416
6,12.940615,77.746994
7,12.969637,77.749745


In [11]:
df = pd.concat([neighborhoods,latlon],axis = 1)
df

Unnamed: 0,Eastern Neighborhoods,Latitude,Longitude
0,Bellandur,12.935772,77.666761
1,CV Raman Nagar,12.985648,77.664977
2,Hoodi,12.991903,77.716201
3,Krishnarajapuram,13.000509,77.675394
4,Mahadevapura,12.993498,77.692416
5,Marathahalli,12.955257,77.698416
6,Varthur,12.940615,77.746994
7,Whitefield,12.969637,77.749745


In [12]:
address = 'Bangalore'

geolocator = Nominatim(user_agent="ON_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 12.9791198, 77.5912997.


In [13]:
# create map of Bangalore using latitude and longitude values
map_bangalore = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df['Latitude'], df['Longitude'], df['Eastern Neighborhoods']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bangalore)  
    
map_bangalore

# Using Foursquare API

In [14]:
CLIENT_ID = 'PGH5Y2UP5ESE5HV20YA1NWVEKTYT3JNVKDSZ3UTA2JJQKDJJ' # your Foursquare ID
CLIENT_SECRET = 'C0H1O1SO5W0KOUOLI2FQYVX10QT4Q1VAKVY5VYJ4VE3IMP21' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PGH5Y2UP5ESE5HV20YA1NWVEKTYT3JNVKDSZ3UTA2JJQKDJJ
CLIENT_SECRET:C0H1O1SO5W0KOUOLI2FQYVX10QT4Q1VAKVY5VYJ4VE3IMP21


In [15]:
df.loc[0,'Eastern Neighborhoods']

'Bellandur'

In [16]:
neighborhood_latitude = df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df.loc[0, 'Eastern Neighborhoods'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Bellandur are 12.93577245, 77.66676103753434.


In [17]:
# type your answer here
LIMIT = 100

radius = 2500

url = url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=PGH5Y2UP5ESE5HV20YA1NWVEKTYT3JNVKDSZ3UTA2JJQKDJJ&client_secret=C0H1O1SO5W0KOUOLI2FQYVX10QT4Q1VAKVY5VYJ4VE3IMP21&v=20180605&ll=12.93577245,77.66676103753434&radius=2500&limit=100'

In [18]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60680b4fe9981a03cf1b2088'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Current map view',
  'headerFullLocation': 'Current map view',
  'headerLocationGranularity': 'unknown',
  'totalResults': 73,
  'suggestedBounds': {'ne': {'lat': 12.958272472500022,
    'lng': 77.68980383891827},
   'sw': {'lat': 12.913272427499978, 'lng': 77.64371823615042}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bd44107b221c9b6ec34dbd0',
       'name': 'Madhuloka',
       'location': {'address': 'Bellandur, HSR Layout',
        'crossStreet': 'ORR Service Road',
        'lat': 12.922247495009085,
        'lng': 77.66842785685435,
        'labeledLatLngs': [

In [19]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [20]:
import json
from pandas.io.json import json_normalize

In [21]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Madhuloka,Liquor Store,12.922247,77.668428
1,Happy Endings,Dessert Shop,12.924386,77.67297
2,Cafe Coffee Day,Café,12.924097,77.670549
3,Daily Bread,Bakery,12.926866,77.670605
4,Kaikondanahalli lake,Lake,12.915391,77.6733


In [22]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

73 venues were returned by Foursquare.


In [23]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [24]:
eastern_bangalore_venues = getNearbyVenues(names=df['Eastern Neighborhoods'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Bellandur
CV Raman Nagar
Hoodi
Krishnarajapuram
Mahadevapura
Marathahalli
Varthur
Whitefield


In [25]:
eastern_bangalore_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,CV Raman Nagar,12.985648,77.664977,Pizza Hut,12.985679,77.668133,Pizza Place
1,CV Raman Nagar,12.985648,77.664977,New Tandoor,12.985288,77.667644,Indian Restaurant
2,CV Raman Nagar,12.985648,77.664977,Cafe Coffee Day,12.985949,77.668792,Café
3,CV Raman Nagar,12.985648,77.664977,DRDO Complex Fountain Square,12.985164,77.663264,Park
4,CV Raman Nagar,12.985648,77.664977,Mast Kalandar,12.985934,77.668788,Indian Restaurant


In [26]:
eastern_bangalore_venues[['Venue Category']].value_counts()

Venue Category               
Indian Restaurant                11
Clothing Store                    5
Bakery                            3
Café                              3
Pizza Place                       3
Movie Theater                     3
Coffee Shop                       2
Chinese Restaurant                2
Kerala Restaurant                 2
Fast Food Restaurant              2
Department Store                  2
Eastern European Restaurant       1
French Restaurant                 1
Fish Market                       1
Yoga Studio                       1
Convenience Store                 1
Hotel                             1
Bus Station                       1
Bar                               1
Asian Restaurant                  1
Gym / Fitness Center              1
IT Services                       1
Hotel Bar                         1
Women's Store                     1
Italian Restaurant                1
Lounge                            1
Market                            

In [27]:
eastern_bangalore_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CV Raman Nagar,10,10,10,10,10,10
Hoodi,3,3,3,3,3,3
Krishnarajapuram,6,6,6,6,6,6
Mahadevapura,16,16,16,16,16,16
Marathahalli,17,17,17,17,17,17
Varthur,1,1,1,1,1,1
Whitefield,12,12,12,12,12,12


In [28]:
print('There are {} uniques categories.'.format(len(eastern_bangalore_venues['Venue Category'].unique())))

There are 38 uniques categories.


# Preparing data to use in the algorithm

In [29]:
# one hot encoding
eastern_bangalore_onehot = pd.get_dummies(eastern_bangalore_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
eastern_bangalore_onehot['Neighborhood'] = eastern_bangalore_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [eastern_bangalore_onehot.columns[-1]] + list(eastern_bangalore_onehot.columns[:-1])
eastern_bangalore_onehot = eastern_bangalore_onehot[fixed_columns]

eastern_bangalore_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Bakery,Bar,Bus Station,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Department Store,Eastern European Restaurant,Fast Food Restaurant,Fish Market,French Restaurant,Gym / Fitness Center,Hotel,Hotel Bar,IT Services,Indian Restaurant,Italian Restaurant,Kerala Restaurant,Lounge,Market,Movie Theater,Multiplex,Noodle House,Park,Pizza Place,Restaurant,Shop & Service,Shopping Mall,Snack Place,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Women's Store,Yoga Studio
0,CV Raman Nagar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
1,CV Raman Nagar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,CV Raman Nagar,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,CV Raman Nagar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,CV Raman Nagar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [30]:
eastern_bangalore_grouped = eastern_bangalore_onehot.groupby('Neighborhood').mean().reset_index()
eastern_bangalore_grouped

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Bakery,Bar,Bus Station,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Department Store,Eastern European Restaurant,Fast Food Restaurant,Fish Market,French Restaurant,Gym / Fitness Center,Hotel,Hotel Bar,IT Services,Indian Restaurant,Italian Restaurant,Kerala Restaurant,Lounge,Market,Movie Theater,Multiplex,Noodle House,Park,Pizza Place,Restaurant,Shop & Service,Shopping Mall,Snack Place,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Women's Store,Yoga Studio
0,CV Raman Nagar,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.3,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Hoodi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333
2,Krishnarajapuram,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.166667,0.0,0.0,0.0,0.0,0.166667,0.166667,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0
3,Mahadevapura,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.125,0.0625,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.125,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0625,0.0
4,Marathahalli,0.0,0.058824,0.058824,0.0,0.058824,0.0,0.058824,0.235294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.235294,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.058824,0.0,0.0,0.0
5,Varthur,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Whitefield,0.0,0.0,0.166667,0.0,0.0,0.166667,0.083333,0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,0.0,0.083333,0.083333,0.083333,0.0,0.0,0.0,0.083333,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
num_top_venues = 5

for hood in eastern_bangalore_grouped['Neighborhood']:
    print("----"+str(hood)+"----")
    temp = eastern_bangalore_grouped[eastern_bangalore_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----CV Raman Nagar----
               venue  freq
0  Indian Restaurant   0.3
1        Pizza Place   0.3
2               Café   0.1
3               Park   0.1
4     Shop & Service   0.1


----Hoodi----
               venue  freq
0  Indian Restaurant  0.67
1        Yoga Studio  0.33
2             Bakery  0.00
3                Bar  0.00
4             Lounge  0.00


----Krishnarajapuram----
               venue  freq
0  Indian Restaurant  0.17
1        IT Services  0.17
2      Movie Theater  0.17
3      Train Station  0.17
4        Fish Market  0.17


----Mahadevapura----
                venue  freq
0       Movie Theater  0.12
1         Coffee Shop  0.12
2  Italian Restaurant  0.06
3           Multiplex  0.06
4        Noodle House  0.06


----Marathahalli----
               venue  freq
0  Indian Restaurant  0.24
1     Clothing Store  0.24
2   Asian Restaurant  0.06
3         Restaurant  0.06
4             Lounge  0.06


----Varthur----
               venue  freq
0  Indian Restaurant   1.0


In [32]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [33]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = eastern_bangalore_grouped['Neighborhood']

for ind in np.arange(eastern_bangalore_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(eastern_bangalore_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,CV Raman Nagar,Indian Restaurant,Pizza Place,Park,Café,Shop & Service,Department Store,Coffee Shop,French Restaurant,Fish Market,Fast Food Restaurant
1,Hoodi,Indian Restaurant,Yoga Studio,Asian Restaurant,Department Store,Hotel,Gym / Fitness Center,French Restaurant,Fish Market,Fast Food Restaurant,Eastern European Restaurant
2,Krishnarajapuram,IT Services,Indian Restaurant,Train Station,Fish Market,Fast Food Restaurant,Movie Theater,Convenience Store,French Restaurant,Eastern European Restaurant,Department Store
3,Mahadevapura,Coffee Shop,Movie Theater,American Restaurant,Clothing Store,Fast Food Restaurant,Women's Store,Italian Restaurant,Convenience Store,Multiplex,French Restaurant
4,Marathahalli,Clothing Store,Indian Restaurant,Lounge,Asian Restaurant,Vegetarian / Vegan Restaurant,Bakery,Snack Place,Bus Station,Restaurant,Chinese Restaurant


In [34]:
from sklearn.cluster import KMeans

In [35]:
# set number of clusters
kclusters = 6

eastern_bangalore_grouped_clustering = eastern_bangalore_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(eastern_bangalore_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([5, 3, 2, 2, 0, 1, 4], dtype=int32)

In [36]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

eastern_bangalore_merged = df

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
eastern_bangalore_merged = eastern_bangalore_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Eastern Neighborhoods')

eastern_bangalore_merged.head(10) # check the last columns!

Unnamed: 0,Eastern Neighborhoods,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bellandur,12.935772,77.666761,,,,,,,,,,,
1,CV Raman Nagar,12.985648,77.664977,5.0,Indian Restaurant,Pizza Place,Park,Café,Shop & Service,Department Store,Coffee Shop,French Restaurant,Fish Market,Fast Food Restaurant
2,Hoodi,12.991903,77.716201,3.0,Indian Restaurant,Yoga Studio,Asian Restaurant,Department Store,Hotel,Gym / Fitness Center,French Restaurant,Fish Market,Fast Food Restaurant,Eastern European Restaurant
3,Krishnarajapuram,13.000509,77.675394,2.0,IT Services,Indian Restaurant,Train Station,Fish Market,Fast Food Restaurant,Movie Theater,Convenience Store,French Restaurant,Eastern European Restaurant,Department Store
4,Mahadevapura,12.993498,77.692416,2.0,Coffee Shop,Movie Theater,American Restaurant,Clothing Store,Fast Food Restaurant,Women's Store,Italian Restaurant,Convenience Store,Multiplex,French Restaurant
5,Marathahalli,12.955257,77.698416,0.0,Clothing Store,Indian Restaurant,Lounge,Asian Restaurant,Vegetarian / Vegan Restaurant,Bakery,Snack Place,Bus Station,Restaurant,Chinese Restaurant
6,Varthur,12.940615,77.746994,1.0,Indian Restaurant,Convenience Store,Hotel,Gym / Fitness Center,French Restaurant,Fish Market,Fast Food Restaurant,Eastern European Restaurant,Department Store,Yoga Studio
7,Whitefield,12.969637,77.749745,4.0,Bakery,Café,Hotel,Gym / Fitness Center,Kerala Restaurant,Chinese Restaurant,Market,Hotel Bar,Department Store,Eastern European Restaurant


In [37]:
#del neighborhoods_venues_sorted['Cluster Labels']
eastern_bangalore_merged.drop(0,inplace = True)

In [38]:
eastern_bangalore_merged

Unnamed: 0,Eastern Neighborhoods,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,CV Raman Nagar,12.985648,77.664977,5.0,Indian Restaurant,Pizza Place,Park,Café,Shop & Service,Department Store,Coffee Shop,French Restaurant,Fish Market,Fast Food Restaurant
2,Hoodi,12.991903,77.716201,3.0,Indian Restaurant,Yoga Studio,Asian Restaurant,Department Store,Hotel,Gym / Fitness Center,French Restaurant,Fish Market,Fast Food Restaurant,Eastern European Restaurant
3,Krishnarajapuram,13.000509,77.675394,2.0,IT Services,Indian Restaurant,Train Station,Fish Market,Fast Food Restaurant,Movie Theater,Convenience Store,French Restaurant,Eastern European Restaurant,Department Store
4,Mahadevapura,12.993498,77.692416,2.0,Coffee Shop,Movie Theater,American Restaurant,Clothing Store,Fast Food Restaurant,Women's Store,Italian Restaurant,Convenience Store,Multiplex,French Restaurant
5,Marathahalli,12.955257,77.698416,0.0,Clothing Store,Indian Restaurant,Lounge,Asian Restaurant,Vegetarian / Vegan Restaurant,Bakery,Snack Place,Bus Station,Restaurant,Chinese Restaurant
6,Varthur,12.940615,77.746994,1.0,Indian Restaurant,Convenience Store,Hotel,Gym / Fitness Center,French Restaurant,Fish Market,Fast Food Restaurant,Eastern European Restaurant,Department Store,Yoga Studio
7,Whitefield,12.969637,77.749745,4.0,Bakery,Café,Hotel,Gym / Fitness Center,Kerala Restaurant,Chinese Restaurant,Market,Hotel Bar,Department Store,Eastern European Restaurant


In [39]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(eastern_bangalore_merged['Latitude'], eastern_bangalore_merged['Longitude'], eastern_bangalore_merged['Eastern Neighborhoods'], eastern_bangalore_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [40]:
west_bangalore_table = all_tables[7].findAll('td')

In [41]:
west_bangalore_table

[<td><a href="/wiki/Basaveshwaranagar" title="Basaveshwaranagar">Basaveshwaranagar</a>
 </td>,
 <td>
 </td>,
 <td>Basaveshwaranagar is a residential locality located on the western side of Chord Road. It is named after the 12th century philosopher <a class="mw-redirect" href="/wiki/Basavanna" title="Basavanna">Basavanna</a>.
 </td>,
 <td><a href="/wiki/Kamakshipalya" title="Kamakshipalya">Kamakshipalya</a>
 </td>,
 <td>
 </td>,
 <td>Kamakshipalya is a neighbourhood which lies around the intersection of Magadi Road and Outer Ring Road
 </td>,
 <td><a href="/wiki/Kengeri" title="Kengeri">Kengeri</a>
 </td>,
 <td><a class="image" href="/wiki/File:Kengeri.jpg"><img alt="Kengeri.jpg" data-file-height="1507" data-file-width="3648" decoding="async" height="74" src="//upload.wikimedia.org/wikipedia/commons/thumb/3/3b/Kengeri.jpg/180px-Kengeri.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/3b/Kengeri.jpg/270px-Kengeri.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/3b/

In [42]:
#creating dataframe of neighborhoods in eastern bangalore
dummy_list = []
for i,row in enumerate(west_bangalore_table):
    if i%3 == 0:
        dummy_list.append(row.get_text())

west_neigh = []
for item in dummy_list:
    t = item.strip('\n')
    west_neigh.append(t)

print(west_neigh)

['Basaveshwaranagar', 'Kamakshipalya', 'Kengeri', 'Mahalakshmi Layout', 'Nagarbhavi', 'Nandini Layout', 'Nayandahalli', 'Rajajinagar', 'Rajarajeshwari Nagar', 'Vijayanagar']


In [43]:
#creating dataframe
neighborhoods_df = pd.DataFrame(west_neigh, columns = ['Western Neighborhoods'])
neighborhoods_df

Unnamed: 0,Western Neighborhoods
0,Basaveshwaranagar
1,Kamakshipalya
2,Kengeri
3,Mahalakshmi Layout
4,Nagarbhavi
5,Nandini Layout
6,Nayandahalli
7,Rajajinagar
8,Rajarajeshwari Nagar
9,Vijayanagar


In [44]:
latitudes = []
longitudes = []
for item in neighborhoods_df.values:
    address = str(item + ',' + 'Bangalore')
    geolocator = Nominatim(user_agent="ON_explorer")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    latitudes.append(latitude)
    longitudes.append(longitude)
    #print(latitude,longitude)
print(latitudes)
print(longitudes)

[12.9937572, 12.9823617, 12.9176571, 13.0113153, 12.9546741, 13.0104064, 12.9413253, 12.9882338, 12.9274413, 12.9718886]
[77.5391325, 77.5226376, 77.4837568, 77.5446972, 77.5121724, 77.5378032, 77.5212118, 77.554883, 77.5155224, 77.54578883887129]


In [45]:
latlon_df = pd.DataFrame(zip(latitudes,longitudes),columns = ['Latitude','Longitude'])
latlon_df

Unnamed: 0,Latitude,Longitude
0,12.993757,77.539132
1,12.982362,77.522638
2,12.917657,77.483757
3,13.011315,77.544697
4,12.954674,77.512172
5,13.010406,77.537803
6,12.941325,77.521212
7,12.988234,77.554883
8,12.927441,77.515522
9,12.971889,77.545789


In [46]:
df_west = pd.concat([neighborhoods_df,latlon_df],axis = 1)
df_west

Unnamed: 0,Western Neighborhoods,Latitude,Longitude
0,Basaveshwaranagar,12.993757,77.539132
1,Kamakshipalya,12.982362,77.522638
2,Kengeri,12.917657,77.483757
3,Mahalakshmi Layout,13.011315,77.544697
4,Nagarbhavi,12.954674,77.512172
5,Nandini Layout,13.010406,77.537803
6,Nayandahalli,12.941325,77.521212
7,Rajajinagar,12.988234,77.554883
8,Rajarajeshwari Nagar,12.927441,77.515522
9,Vijayanagar,12.971889,77.545789


In [47]:
address = 'Bangalore'

geolocator = Nominatim(user_agent="ON_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 12.9791198, 77.5912997.


In [49]:
# create map of Bangalore using latitude and longitude values
map_bangalore = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_west['Latitude'], df_west['Longitude'], df_west['Western Neighborhoods']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bangalore)  
    
map_bangalore

In [50]:
western_bangalore_venues = getNearbyVenues(names=df_west['Western Neighborhoods'],
                                   latitudes=df_west['Latitude'],
                                   longitudes=df_west['Longitude']
                                  )

Basaveshwaranagar
Kamakshipalya
Kengeri
Mahalakshmi Layout
Nagarbhavi
Nandini Layout
Nayandahalli
Rajajinagar
Rajarajeshwari Nagar
Vijayanagar


In [56]:
western_bangalore_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Basaveshwaranagar,12.993757,77.539132,Polar Bear,12.991461,77.538832,Ice Cream Shop
1,Basaveshwaranagar,12.993757,77.539132,Priyadarshini Grand,12.990235,77.53802,Fast Food Restaurant
2,Basaveshwaranagar,12.993757,77.539132,Gold's gym,12.991855,77.538492,Gym
3,Basaveshwaranagar,12.993757,77.539132,Cafe Coffee Day,12.997306,77.540433,Café
4,Basaveshwaranagar,12.993757,77.539132,Liquor mart,12.99641,77.540195,Liquor Store


In [57]:
western_bangalore_venues[['Venue Category']].value_counts()

Venue Category               
Indian Restaurant                6
Pizza Place                      6
Ice Cream Shop                   5
Fast Food Restaurant             4
Breakfast Spot                   4
Park                             3
Café                             3
Gym                              3
Bakery                           3
Snack Place                      3
Department Store                 2
Beer Garden                      1
Bus Station                      1
Food Court                       1
Clothing Store                   1
Coffee Shop                      1
Deli / Bodega                    1
Diner                            1
Dry Cleaner                      1
Electronics Store                1
Asian Restaurant                 1
Chinese Restaurant               1
Train Station                    1
Food Truck                       1
Motorcycle Shop                  1
Soccer Field                     1
Sandwich Place                   1
Pharmacy                 

In [58]:
western_bangalore_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Basaveshwaranagar,17,17,17,17,17,17
Kamakshipalya,1,1,1,1,1,1
Kengeri,6,6,6,6,6,6
Mahalakshmi Layout,5,5,5,5,5,5
Nagarbhavi,1,1,1,1,1,1
Nandini Layout,4,4,4,4,4,4
Nayandahalli,2,2,2,2,2,2
Rajajinagar,9,9,9,9,9,9
Rajarajeshwari Nagar,22,22,22,22,22,22
Vijayanagar,4,4,4,4,4,4


In [59]:
print('There are {} uniques categories.'.format(len(western_bangalore_venues['Venue Category'].unique())))

There are 40 uniques categories.


In [60]:
# one hot encoding
western_bangalore_onehot = pd.get_dummies(western_bangalore_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
western_bangalore_onehot['Neighborhood'] = western_bangalore_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [western_bangalore_onehot.columns[-1]] + list(western_bangalore_onehot.columns[:-1])
western_bangalore_onehot = western_bangalore_onehot[fixed_columns]

western_bangalore_onehot.head()

Unnamed: 0,Neighborhood,Arcade,Asian Restaurant,Bakery,Beer Garden,Breakfast Spot,Bus Station,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Deli / Bodega,Department Store,Diner,Dry Cleaner,Electronics Store,Fast Food Restaurant,Food Court,Food Truck,Gym,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Japanese Restaurant,Jewelry Store,Lake,Liquor Store,Miscellaneous Shop,Mobile Phone Shop,Motorcycle Shop,Multiplex,Nightclub,Paper / Office Supplies Store,Park,Pharmacy,Pizza Place,Sandwich Place,Snack Place,Soccer Field,Sporting Goods Shop,Train Station
0,Basaveshwaranagar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Basaveshwaranagar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Basaveshwaranagar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Basaveshwaranagar,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Basaveshwaranagar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [61]:
western_bangalore_grouped = western_bangalore_onehot.groupby('Neighborhood').mean().reset_index()
western_bangalore_grouped

Unnamed: 0,Neighborhood,Arcade,Asian Restaurant,Bakery,Beer Garden,Breakfast Spot,Bus Station,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Deli / Bodega,Department Store,Diner,Dry Cleaner,Electronics Store,Fast Food Restaurant,Food Court,Food Truck,Gym,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Japanese Restaurant,Jewelry Store,Lake,Liquor Store,Miscellaneous Shop,Mobile Phone Shop,Motorcycle Shop,Multiplex,Nightclub,Paper / Office Supplies Store,Park,Pharmacy,Pizza Place,Sandwich Place,Snack Place,Soccer Field,Sporting Goods Shop,Train Station
0,Basaveshwaranagar,0.0,0.0,0.058824,0.0,0.0,0.0,0.117647,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.0,0.117647,0.0,0.058824,0.058824,0.176471,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.117647,0.0,0.0,0.0,0.058824,0.0
1,Kamakshipalya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Kengeri,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.166667,0.0,0.0,0.0,0.0,0.166667
3,Mahalakshmi Layout,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0
4,Nagarbhavi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Nandini Layout,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Nayandahalli,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Rajajinagar,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0
8,Rajarajeshwari Nagar,0.045455,0.0,0.0,0.045455,0.090909,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.045455,0.0,0.0,0.045455,0.045455,0.0,0.045455,0.045455,0.045455,0.045455,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.045455,0.045455,0.0,0.0,0.0,0.136364,0.045455,0.0,0.045455,0.0,0.0
9,Vijayanagar,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [62]:
num_top_venues = 5

for hood in western_bangalore_grouped['Neighborhood']:
    print("----"+str(hood)+"----")
    temp = western_bangalore_grouped[western_bangalore_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Basaveshwaranagar----
                  venue  freq
0        Ice Cream Shop  0.18
1           Pizza Place  0.12
2  Fast Food Restaurant  0.12
3                  Café  0.12
4                  Park  0.06


----Kamakshipalya----
                 venue  freq
0                 Park   1.0
1               Arcade   0.0
2            Multiplex   0.0
3  Japanese Restaurant   0.0
4        Jewelry Store   0.0


----Kengeri----
            venue  freq
0   Train Station  0.17
1  Ice Cream Shop  0.17
2            Café  0.17
3     Pizza Place  0.17
4        Pharmacy  0.17


----Mahalakshmi Layout----
               venue  freq
0  Indian Restaurant   0.4
1      Jewelry Store   0.2
2        Snack Place   0.2
3   Asian Restaurant   0.2
4          Multiplex   0.0


----Nagarbhavi----
               venue  freq
0                Gym   1.0
1             Arcade   0.0
2  Indian Restaurant   0.0
3      Jewelry Store   0.0
4               Lake   0.0


----Nandini Layout----
                  venue  freq
0    

In [63]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = western_bangalore_grouped['Neighborhood']

for ind in np.arange(western_bangalore_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(western_bangalore_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Basaveshwaranagar,Ice Cream Shop,Café,Fast Food Restaurant,Pizza Place,Gym,Sporting Goods Shop,Department Store,Liquor Store,Clothing Store,Park
1,Kamakshipalya,Park,Train Station,Coffee Shop,Food Court,Fast Food Restaurant,Electronics Store,Dry Cleaner,Diner,Department Store,Deli / Bodega
2,Kengeri,Train Station,Pizza Place,Café,Dry Cleaner,Pharmacy,Ice Cream Shop,Sandwich Place,Clothing Store,Electronics Store,Snack Place
3,Mahalakshmi Layout,Indian Restaurant,Asian Restaurant,Snack Place,Jewelry Store,Train Station,Deli / Bodega,Fast Food Restaurant,Electronics Store,Dry Cleaner,Diner
4,Nagarbhavi,Gym,Sporting Goods Shop,Food Truck,Food Court,Fast Food Restaurant,Electronics Store,Dry Cleaner,Diner,Department Store,Deli / Bodega


In [64]:
# set number of clusters
kclusters = 5

western_bangalore_grouped_clustering = western_bangalore_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(western_bangalore_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 4, 1, 0, 2, 1, 3, 0, 1, 0], dtype=int32)

In [65]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

western_bangalore_merged = df_west

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
western_bangalore_merged = western_bangalore_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Western Neighborhoods')

western_bangalore_merged.head(10) # check the last columns!

Unnamed: 0,Western Neighborhoods,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Basaveshwaranagar,12.993757,77.539132,1,Ice Cream Shop,Café,Fast Food Restaurant,Pizza Place,Gym,Sporting Goods Shop,Department Store,Liquor Store,Clothing Store,Park
1,Kamakshipalya,12.982362,77.522638,4,Park,Train Station,Coffee Shop,Food Court,Fast Food Restaurant,Electronics Store,Dry Cleaner,Diner,Department Store,Deli / Bodega
2,Kengeri,12.917657,77.483757,1,Train Station,Pizza Place,Café,Dry Cleaner,Pharmacy,Ice Cream Shop,Sandwich Place,Clothing Store,Electronics Store,Snack Place
3,Mahalakshmi Layout,13.011315,77.544697,0,Indian Restaurant,Asian Restaurant,Snack Place,Jewelry Store,Train Station,Deli / Bodega,Fast Food Restaurant,Electronics Store,Dry Cleaner,Diner
4,Nagarbhavi,12.954674,77.512172,2,Gym,Sporting Goods Shop,Food Truck,Food Court,Fast Food Restaurant,Electronics Store,Dry Cleaner,Diner,Department Store,Deli / Bodega
5,Nandini Layout,13.010406,77.537803,1,Bakery,Fast Food Restaurant,Breakfast Spot,Park,Train Station,Deli / Bodega,Food Court,Electronics Store,Dry Cleaner,Diner
6,Nayandahalli,12.941325,77.521212,3,Electronics Store,Miscellaneous Shop,Train Station,Gym,Food Court,Fast Food Restaurant,Dry Cleaner,Diner,Department Store,Deli / Bodega
7,Rajajinagar,12.988234,77.554883,0,Snack Place,Indian Restaurant,Motorcycle Shop,Bakery,Breakfast Spot,Paper / Office Supplies Store,Mobile Phone Shop,Coffee Shop,Electronics Store,Dry Cleaner
8,Rajarajeshwari Nagar,12.927441,77.515522,1,Pizza Place,Breakfast Spot,Ice Cream Shop,Lake,Beer Garden,Chinese Restaurant,Coffee Shop,Diner,Fast Food Restaurant,Food Court
9,Vijayanagar,12.971889,77.545789,0,Deli / Bodega,Indian Restaurant,Bus Station,Department Store,Train Station,Food Court,Fast Food Restaurant,Electronics Store,Dry Cleaner,Diner


In [66]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(western_bangalore_merged['Latitude'], western_bangalore_merged['Longitude'], western_bangalore_merged['Western Neighborhoods'], western_bangalore_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters