#IBM Applied Data Science Capstone Course by Coursera


##1. Import libraries


In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

!pip install geocoder

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |███▎                            | 10kB 19.0MB/s eta 0:00:01[K     |██████▋                         | 20kB 28.3MB/s eta 0:00:01[K     |██████████                      | 30kB 16.9MB/s eta 0:00:01[K     |█████████████▎                  | 40kB 18.0MB/s eta 0:00:01[K     |████████████████▋               | 51kB 15.1MB/s eta 0:00:01[K     |████████████████████            | 61kB 12.7MB/s eta 0:00:01[K     |███████████████████████▎        | 71kB 11.6MB/s eta 0:00:01[K     |██████████████████████████▋     | 81kB 12.6MB/s eta 0:00:01[K     |██████████████████████████████  | 92kB 13.2MB/s eta 0:00:01[K     |████████████████████████████████| 102kB 6.0MB/s 
[?25hCollecting ratelim
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d829

##2. Scrap data from Wikipedia page into a DataFrame

In [2]:
# send the GET request
data = requests.get("https://commons.wikimedia.org/wiki/Category:Suburbs_of_Bangalore").text

In [3]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

In [4]:
# create a list to store neighborhood data
neighborhoodList = []

In [5]:
# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)

In [6]:
# create a new DataFrame from the list
blr_df = pd.DataFrame({"Neighborhood": neighborhoodList})

blr_df.head(5)

Unnamed: 0,Neighborhood
0,"► Agara, Bangalore‎ (2 C, 7 F)"
1,► Arekere‎ (5 F)
2,"► Banashankari‎ (1 C, 4 F)"
3,► Banaswadi‎ (2 F)
4,"► Basavanagudi‎ (5 C, 11 F)"


In [7]:
# print the number of rows of the dataframe
blr_df.shape

(58, 1)

## 3. Get the geographical coordinates


In [8]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Bangalore, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [9]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in blr_df["Neighborhood"].tolist() ]

In [10]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [11]:
# merge the coordinates into the original dataframe
blr_df['Latitude'] = df_coords['Latitude']
blr_df['Longitude'] = df_coords['Longitude']

In [12]:
# check the neighborhoods and the coordinates
print(blr_df.shape)
blr_df

(58, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,"► Agara, Bangalore‎ (2 C, 7 F)",12.841273,77.481598
1,► Arekere‎ (5 F),12.99799,77.61047
2,"► Banashankari‎ (1 C, 4 F)",12.93874,77.55509
3,► Banaswadi‎ (2 F),12.99784,77.61037
4,"► Basavanagudi‎ (5 C, 11 F)",12.93898,77.57137
5,"► Begur, Bangalore‎ (1 C, 6 F)",12.88245,77.62475
6,"► Bellandur‎ (1 C, 5 F)",12.92734,77.67169
7,"► Bengaluru Pete‎ (9 C, 4 F)",12.96618,77.5869
8,"► Bidadi‎ (2 C, 2 F)",13.00266,77.62949
9,► Bommasandra‎ (33 F),12.81753,77.67879


In [13]:
# save the DataFrame as CSV file
blr_df.to_csv("blr_df.csv", index=False)

##4. Create a map of Bengaluru with neighborhoods superimposed on top

In [14]:
# get the coordinates of Bengaluru
address = 'Bengaluru, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Bengaluru, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Bengaluru, India 12.9791198, 77.5912997.


In [15]:
# create map of Bengaluru using latitude and longitude values
map_blr = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(blr_df['Latitude'], blr_df['Longitude'], blr_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_blr)  
    
map_blr

In [16]:
# save the map as HTML file
map_blr.save('map_blr.html')

##5. Use the Foursquare API to explore the neighborhoods

In [17]:
# define Foursquare Credentials and Version
CLIENT_ID = 'FSJXMK50GG0BFPDWKOG1JYQVBPEOTPESHELJL3DHMZXZPXI2' # your Foursquare ID
CLIENT_SECRET = '5ERS3ZI430U31ROETTEQCNRLV3SHKP3CDPLBJ1AWUAOGCXMN' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FSJXMK50GG0BFPDWKOG1JYQVBPEOTPESHELJL3DHMZXZPXI2
CLIENT_SECRET:5ERS3ZI430U31ROETTEQCNRLV3SHKP3CDPLBJ1AWUAOGCXMN


Since the Area of Bengaluru is 741 sq km, We choose a radius of 5000 meters

In [18]:
radius = 5000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(blr_df['Latitude'], blr_df['Longitude'], blr_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [19]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(5261, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,"► Agara, Bangalore‎ (2 C, 7 F)",12.841273,77.481598,Art of Living International Centre,12.844607,77.507343,Spiritual Center
1,"► Agara, Bangalore‎ (2 C, 7 F)",12.841273,77.481598,Cafe Vishala,12.826968,77.510499,Vegetarian / Vegan Restaurant
2,"► Agara, Bangalore‎ (2 C, 7 F)",12.841273,77.481598,Café Coffee Day,12.812747,77.511749,Café
3,"► Agara, Bangalore‎ (2 C, 7 F)",12.841273,77.481598,Guhantara Resort Bangalore,12.800599,77.491905,Resort
4,"► Agara, Bangalore‎ (2 C, 7 F)",12.841273,77.481598,Kanakpura Hills,12.827524,77.507402,Trail


Let's check how many venues were returned for each neighorhood

In [20]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"► Agara, Bangalore‎ (2 C, 7 F)",8,8,8,8,8,8
► Arekere‎ (5 F),100,100,100,100,100,100
"► Banashankari‎ (1 C, 4 F)",100,100,100,100,100,100
► Banaswadi‎ (2 F),100,100,100,100,100,100
"► Basavanagudi‎ (5 C, 11 F)",100,100,100,100,100,100
"► Begur, Bangalore‎ (1 C, 6 F)",100,100,100,100,100,100
"► Bellandur‎ (1 C, 5 F)",100,100,100,100,100,100
"► Bengaluru Pete‎ (9 C, 4 F)",100,100,100,100,100,100
"► Bidadi‎ (2 C, 2 F)",100,100,100,100,100,100
► Bommasandra‎ (33 F),52,52,52,52,52,52


Let's find out how many unique categories can be curated from all the returned venues

In [21]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))


There are 147 uniques categories.


In [22]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:200]

array(['Spiritual Center', 'Vegetarian / Vegan Restaurant', 'Café',
       'Resort', 'Trail', 'Spa', 'Restaurant', 'Indian Restaurant',
       'Ice Cream Shop', 'Park', 'Hyderabadi Restaurant',
       'Eastern European Restaurant', 'Tea Room', 'Seafood Restaurant',
       'Pakistani Restaurant', 'Steakhouse', 'Pub', 'Bar',
       'Afghan Restaurant', 'Plaza', 'Hotel', 'Burger Joint',
       'Japanese Restaurant', 'Bookstore', 'Music Store',
       'Department Store', 'American Restaurant', 'Lounge',
       'Cricket Ground', 'Italian Restaurant', 'Brewery',
       'Sushi Restaurant', 'Mexican Restaurant', 'Deli / Bodega',
       'South Indian Restaurant', 'Racetrack', 'Cupcake Shop',
       'Cocktail Bar', 'Asian Restaurant', 'Gym / Fitness Center',
       'French Restaurant', 'Shopping Mall', 'Hotel Bar',
       'Soccer Stadium', 'Art Gallery', 'Karnataka Restaurant',
       'Snack Place', 'Boutique', 'Juice Bar', 'Motorcycle Shop',
       'Chocolate Shop', 'Gaming Cafe', 'Diner', 'Ger

In [23]:
# check if the results contain "Shopping Mall"
"Tech Startup" in venues_df['VenueCategory'].unique()

True

##6. Analyze Each Neighborhood (One Hot Encoding)

In [24]:
# one hot encoding
blr_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
blr_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [blr_onehot.columns[-1]] + list(blr_onehot.columns[:-1])
blr_onehot = blr_onehot[fixed_columns]

print(blr_onehot.shape)
blr_onehot.head()

(5261, 148)


Unnamed: 0,Neighborhoods,Accessories Store,Afghan Restaurant,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,BBQ Joint,Badminton Court,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Garden,Bengali Restaurant,Big Box Store,Bistro,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Station,Butcher,Café,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Convenience Store,Cosmetics Shop,Creperie,Cricket Ground,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Dive Bar,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Fast Food Restaurant,Field,Financial or Legal Service,Food,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Gas Station,Gastropub,General Entertainment,German Restaurant,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Halal Restaurant,History Museum,Hotel,Hotel Bar,Hotel Pool,Hyderabadi Restaurant,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karnataka Restaurant,Kerala Restaurant,Korean Restaurant,Lake,Light Rail Station,Liquor Store,Lounge,Maharashtrian Restaurant,Market,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Motorcycle Shop,Movie Theater,Multicuisine Indian Restaurant,Multiplex,Music Store,Music Venue,Nightclub,North Indian Restaurant,Office,Outlet Store,Pakistani Restaurant,Park,Performing Arts Venue,Pizza Place,Plaza,Pub,Punjabi Restaurant,Racetrack,Rajasthani Restaurant,Resort,Restaurant,Salad Place,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shopping Mall,Snack Place,Soccer Field,Soccer Stadium,South Indian Restaurant,Spa,Spiritual Center,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Tech Startup,Thai Restaurant,Theater,Toll Booth,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Women's Store
0,"► Agara, Bangalore‎ (2 C, 7 F)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"► Agara, Bangalore‎ (2 C, 7 F)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2,"► Agara, Bangalore‎ (2 C, 7 F)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"► Agara, Bangalore‎ (2 C, 7 F)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"► Agara, Bangalore‎ (2 C, 7 F)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0


Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [25]:
blr_grouped = blr_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(blr_grouped.shape)
blr_grouped

(58, 148)


Unnamed: 0,Neighborhoods,Accessories Store,Afghan Restaurant,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,BBQ Joint,Badminton Court,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Garden,Bengali Restaurant,Big Box Store,Bistro,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Station,Butcher,Café,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Convenience Store,Cosmetics Shop,Creperie,Cricket Ground,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Dive Bar,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Fast Food Restaurant,Field,Financial or Legal Service,Food,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Gas Station,Gastropub,General Entertainment,German Restaurant,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Halal Restaurant,History Museum,Hotel,Hotel Bar,Hotel Pool,Hyderabadi Restaurant,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karnataka Restaurant,Kerala Restaurant,Korean Restaurant,Lake,Light Rail Station,Liquor Store,Lounge,Maharashtrian Restaurant,Market,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Motorcycle Shop,Movie Theater,Multicuisine Indian Restaurant,Multiplex,Music Store,Music Venue,Nightclub,North Indian Restaurant,Office,Outlet Store,Pakistani Restaurant,Park,Performing Arts Venue,Pizza Place,Plaza,Pub,Punjabi Restaurant,Racetrack,Rajasthani Restaurant,Resort,Restaurant,Salad Place,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shopping Mall,Snack Place,Soccer Field,Soccer Stadium,South Indian Restaurant,Spa,Spiritual Center,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Tech Startup,Thai Restaurant,Theater,Toll Booth,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Women's Store
0,"► Agara, Bangalore‎ (2 C, 7 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0
1,► Arekere‎ (5 F),0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.06,0.01,0.01,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.1,0.01,0.0,0.01,0.09,0.0,0.13,0.0,0.0,0.02,0.02,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.01,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"► Banashankari‎ (1 C, 4 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.13,0.02,0.0,0.01,0.0,0.06,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.01,0.18,0.0,0.0,0.01,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.07,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.04,0.0,0.03,0.01,0.04,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01
3,► Banaswadi‎ (2 F),0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.06,0.0,0.01,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.1,0.01,0.0,0.01,0.09,0.0,0.13,0.0,0.0,0.02,0.02,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.01,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"► Basavanagudi‎ (5 C, 11 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.04,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.07,0.0,0.16,0.0,0.0,0.03,0.02,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.02,0.0,0.03,0.01,0.03,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
5,"► Begur, Bangalore‎ (1 C, 6 F)",0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.12,0.03,0.0,0.01,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.13,0.01,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.1,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.02,0.0,0.0,0.01,0.04,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
6,"► Bellandur‎ (1 C, 5 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.13,0.02,0.0,0.01,0.0,0.05,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.05,0.0,0.09,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.02,0.02,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.15,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0
7,"► Bengaluru Pete‎ (9 C, 4 F)",0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.02,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.05,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.1,0.01,0.0,0.0,0.05,0.0,0.15,0.0,0.0,0.02,0.03,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.03,0.0,0.01,0.0,0.0,0.01,0.0,0.02,0.0,0.01,0.01,0.02,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"► Bidadi‎ (2 C, 2 F)",0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.02,0.0,0.04,0.0,0.0,0.0,0.0,0.06,0.01,0.01,0.0,0.03,0.01,0.0,0.0,0.0,0.01,0.01,0.02,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.01,0.0,0.01,0.1,0.0,0.12,0.0,0.0,0.02,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.01,0.01,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,► Bommasandra‎ (33 F),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.096154,0.019231,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.096154,0.0,0.0,0.019231,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.134615,0.0,0.019231,0.0,0.0,0.0,0.153846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.096154,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.019231,0.019231,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0


In [26]:
len(blr_grouped[blr_grouped["Bus Station"] > 0])

5

In [27]:
blr_bus = blr_grouped[["Neighborhoods","Bus Station"]]


In [28]:
blr_bus.head()


Unnamed: 0,Neighborhoods,Bus Station
0,"► Agara, Bangalore‎ (2 C, 7 F)",0.0
1,► Arekere‎ (5 F),0.0
2,"► Banashankari‎ (1 C, 4 F)",0.0
3,► Banaswadi‎ (2 F),0.0
4,"► Basavanagudi‎ (5 C, 11 F)",0.0


##7. Cluster Neighborhoods


Run k-means to cluster the neighborhoods in Bengaluru into 3 clusters.

In [29]:
# set number of clusters
kclusters = 3

blr_clustering = blr_bus.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(blr_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [30]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
blr_merged = blr_bus.copy()

# add clustering labels
blr_merged["Cluster Labels"] = kmeans.labels_

In [31]:
blr_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
blr_merged.head()

Unnamed: 0,Neighborhood,Bus Station,Cluster Labels
0,"► Agara, Bangalore‎ (2 C, 7 F)",0.0,0
1,► Arekere‎ (5 F),0.0,0
2,"► Banashankari‎ (1 C, 4 F)",0.0,0
3,► Banaswadi‎ (2 F),0.0,0
4,"► Basavanagudi‎ (5 C, 11 F)",0.0,0


In [32]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
blr_merged = blr_merged.join(blr_df.set_index("Neighborhood"), on="Neighborhood")

print(blr_merged.shape)
blr_merged.head() # check the last columns!

(58, 5)


Unnamed: 0,Neighborhood,Bus Station,Cluster Labels,Latitude,Longitude
0,"► Agara, Bangalore‎ (2 C, 7 F)",0.0,0,12.841273,77.481598
1,► Arekere‎ (5 F),0.0,0,12.99799,77.61047
2,"► Banashankari‎ (1 C, 4 F)",0.0,0,12.93874,77.55509
3,► Banaswadi‎ (2 F),0.0,0,12.99784,77.61037
4,"► Basavanagudi‎ (5 C, 11 F)",0.0,0,12.93898,77.57137


In [33]:
# sort the results by Cluster Labels
print(blr_merged.shape)
blr_merged.sort_values(["Cluster Labels"], inplace=True)
blr_merged

(58, 5)


Unnamed: 0,Neighborhood,Bus Station,Cluster Labels,Latitude,Longitude
0,"► Agara, Bangalore‎ (2 C, 7 F)",0.0,0,12.841273,77.481598
30,► Konanakunte‎ (1 F),0.0,0,12.89041,77.56176
31,"► Koramangala‎ (1 C, 13 F)",0.0,0,12.95656,77.54385
32,"► Krishnarajapura‎ (3 C, 4 F)",0.0,0,13.00039,77.68368
33,► Kundalahalli‎ (96 F),0.0,0,12.96752,77.715
34,"► Madiwala‎ (1 C, 6 F)",0.0,0,12.95661,77.61355
35,"► Magadi‎ (2 C, 11 F)",0.0,0,12.98621,77.51441
36,► Mahadevapura‎ (2 C),0.0,0,13.00266,77.62949
37,► Majestic (Bangalore)‎ (1 C),0.0,0,12.95745,77.60091
38,"► Malleswaram‎ (4 C, 3 F)",0.0,0,12.995,77.57346


In [34]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(blr_merged['Latitude'], blr_merged['Longitude'], blr_merged['Neighborhood'], blr_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [35]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

##8. Examine Clusters


Cluster 0

In [36]:
blr_merged.loc[blr_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Bus Station,Cluster Labels,Latitude,Longitude
0,"► Agara, Bangalore‎ (2 C, 7 F)",0.0,0,12.841273,77.481598
30,► Konanakunte‎ (1 F),0.0,0,12.89041,77.56176
31,"► Koramangala‎ (1 C, 13 F)",0.0,0,12.95656,77.54385
32,"► Krishnarajapura‎ (3 C, 4 F)",0.0,0,13.00039,77.68368
33,► Kundalahalli‎ (96 F),0.0,0,12.96752,77.715
34,"► Madiwala‎ (1 C, 6 F)",0.0,0,12.95661,77.61355
35,"► Magadi‎ (2 C, 11 F)",0.0,0,12.98621,77.51441
36,► Mahadevapura‎ (2 C),0.0,0,13.00266,77.62949
37,► Majestic (Bangalore)‎ (1 C),0.0,0,12.95745,77.60091
38,"► Malleswaram‎ (4 C, 3 F)",0.0,0,12.995,77.57346


Cluster 1

In [37]:
blr_merged.loc[blr_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Bus Station,Cluster Labels,Latitude,Longitude
47,"► Seetharampalya‎ (1 C, 14 F)",0.1,1,13.1132,77.42463


Cluster 2

In [38]:
blr_merged.loc[blr_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Bus Station,Cluster Labels,Latitude,Longitude
46,► Sahakara Nagar‎ (1 C),0.012346,2,13.06095,77.57398
20,"► Hebbal‎ (2 C, 3 F)",0.01,2,13.04981,77.58903
29,"► Kodihalli, Bangalore‎ (1 C, 4 F)",0.010753,2,13.05976,77.57673
25,"► Jakkur‎ (2 C, 1 F)",0.017241,2,13.07564,77.60394


It is clear from the clustering analysis that Cluster 1 and CLuster 2 lack bus stations. These clusters are located at the outer areas of the main city. Constructing few bus stations in these clusters can connect the city better. Since, most of the bus stations are build in the central part of the city, this project work recommends that more bus stations be built in cluster 1 and 2.