In [12]:
!pip install beautifulsoup4
!pip install lxml
!pip install html5lib
!pip install requests



In [13]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.11

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.11

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Libraries imported.


## 2. Scrap data from Wikipedia page into a DataFrame

In [14]:
# send the GET request
data = requests.get("https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Bangalore").text

In [15]:
# parse data from the html into a beautifulsoup object
from bs4 import BeautifulSoup # library to parse HTML and XML documents
soup = BeautifulSoup(data, 'html.parser')

In [16]:
# create a list to store neighborhood data
neighborhoodList = []

In [17]:
# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)

In [18]:
# create a new DataFrame from the list
bangalore_df = pd.DataFrame({"Neighborhood": neighborhoodList})

#will give you a dataframe without first three rows
bangalore_df.drop(bangalore_df.index[:3], inplace=True)

#reset index
bangalore_df = bangalore_df.reset_index(drop=True)

bangalore_df.head()


Unnamed: 0,Neighborhood
0,Adugodi
1,"Agara, Bangalore"
2,Ananthnagar
3,Anjanapura
4,Arekere


In [19]:
# print the number of rows of the dataframe
bangalore_df.shape

(127, 1)

## 3. Get the geographical coordinates

In [20]:
!pip install geocoder
import geocoder # to get coordinates

# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Bangalore, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords



In [21]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in bangalore_df["Neighborhood"].tolist() ]

In [22]:
coords

[[12.944020000000023, 77.60783000000004],
 [12.842700000000036, 77.48882000000003],
 [12.954080000000033, 77.54129000000006],
 [12.858100000000036, 77.55907000000008],
 [12.885650000000055, 77.59665000000007],
 [12.963430000000074, 77.61294000000004],
 [13.027520000000038, 77.65044000000006],
 [13.077242798666957, 77.65761368466994],
 [13.044700000000034, 77.55000000000007],
 [12.922290000000032, 77.56988000000007],
 [13.028473575864194, 77.63189241756366],
 [12.939010000000053, 77.57136000000008],
 [12.992260000000044, 77.53441000000004],
 [12.927350000000047, 77.67184000000003],
 [13.082990000000052, 77.54779000000008],
 [12.900040000000047, 77.60430000000008],
 [12.90307000000007, 77.62442000000004],
 [12.817540000000065, 77.67879000000005],
 [12.993310000000065, 77.66125000000005],
 [12.914980000000071, 77.61006000000003],
 [12.951800000000048, 77.54438000000005],
 [12.913190000000043, 77.71555000000006],
 [12.959686963552544, 77.57131311818829],
 [12.793990000000065, 77.7001800000

In [23]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [24]:
# merge the coordinates into the original dataframe
bangalore_df['Latitude'] = df_coords['Latitude']
bangalore_df['Longitude'] = df_coords['Longitude']

In [25]:
# check the neighborhoods and the coordinates
print(bangalore_df.shape)
bangalore_df

(127, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Adugodi,12.94402,77.60783
1,"Agara, Bangalore",12.8427,77.48882
2,Ananthnagar,12.95408,77.54129
3,Anjanapura,12.8581,77.55907
4,Arekere,12.88565,77.59665
5,Austin Town,12.96343,77.61294
6,Babusapalya,13.02752,77.65044
7,"Bagalur, Bangalore Urban",13.077243,77.657614
8,Bahubalinagar,13.0447,77.55
9,Banashankari,12.92229,77.56988


In [26]:
# save the DataFrame as CSV file
bangalore_df.to_csv("banagalore_df.csv", index=False)

## 4. Create a map of Bangalore with neighborhoods superimposed on top

In [27]:
# get the coordinates of Kuala Lumpur
address = 'Bangalore, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Kuala Lumpur, Malaysiae {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Kuala Lumpur, Malaysiae 12.9791198, 77.5912997.


In [28]:
# create map of Bangalore using latitude and longitude values
bangalore_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(bangalore_df['Latitude'], bangalore_df['Longitude'], bangalore_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(bangalore_map)  
    
bangalore_map

In [29]:
# save the map as HTML file
bangalore_map.save('bangalore_map.html')

## 5. Use the Foursquare API to explore the neighborhoods¶

In [30]:
# define Foursquare Credentials and Version
CLIENT_ID = 'UKXROAK35DELHLCEQRTEGJQZG5NOECR23QJXXITPZZXRUKV2' # your Foursquare ID
CLIENT_SECRET = 'CQEC3JSVM42EGLOAUXEF4SCDGQSNBGAVOVDJKC3BMV3U1SXY' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Your credentails:
CLIENT_ID: UKXROAK35DELHLCEQRTEGJQZG5NOECR23QJXXITPZZXRUKV2
CLIENT_SECRET:CQEC3JSVM42EGLOAUXEF4SCDGQSNBGAVOVDJKC3BMV3U1SXY


In [31]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(bangalore_df['Latitude'], bangalore_df['Longitude'], bangalore_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [32]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(6805, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Adugodi,12.94402,77.60783,PVR IMAX,12.934595,77.611321,Movie Theater
1,Adugodi,12.94402,77.60783,Tommy Hilfiger,12.934552,77.611347,Clothing Store
2,Adugodi,12.94402,77.60783,Truffles Ice & Spice,12.933443,77.614265,Burger Joint
3,Adugodi,12.94402,77.60783,PVR Cinemas,12.934389,77.611184,Multiplex
4,Adugodi,12.94402,77.60783,Dyu Art Cafe,12.937289,77.617591,Café


In [33]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adugodi,100,100,100,100,100,100
Ananthnagar,30,30,30,30,30,30
Anjanapura,3,3,3,3,3,3
Arekere,77,77,77,77,77,77
Austin Town,100,100,100,100,100,100
BTM Layout,88,88,88,88,88,88
Babusapalya,52,52,52,52,52,52
"Bagalur, Bangalore Urban",11,11,11,11,11,11
Bahubalinagar,19,19,19,19,19,19
Banashankari,100,100,100,100,100,100


In [34]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 235 uniques categories.


In [37]:
# print out the list of categories
venues_df['VenueCategory'].unique()

array(['Movie Theater', 'Clothing Store', 'Burger Joint', 'Multiplex',
       'Café', 'Creperie', 'Lounge', 'Indian Restaurant', 'Dessert Shop',
       'Bar', 'Breakfast Spot', 'Shopping Mall', 'Tea Room',
       'Coffee Shop', 'Mobile Phone Shop', 'Ice Cream Shop', 'Donut Shop',
       'Italian Restaurant', 'Chinese Restaurant', 'Arcade', 'Juice Bar',
       'Smoke Shop', 'Yoga Studio', 'Bakery', 'Indian Sweet Shop',
       'Gaming Cafe', 'Bookstore', 'Gym', "Men's Store", 'Hot Dog Joint',
       'Pub', 'Andhra Restaurant', 'Mexican Restaurant',
       'Thai Restaurant', 'Paper / Office Supplies Store', 'Brewery',
       'Fast Food Restaurant', 'Comfort Food Restaurant',
       'Kerala Restaurant', 'Middle Eastern Restaurant', 'Pizza Place',
       'Punjabi Restaurant', 'Cosmetics Shop', 'Tibetan Restaurant',
       'Liquor Store', 'Salon / Barbershop', 'Bowling Alley',
       'Restaurant', 'Food Court', 'Snack Place', 'Seafood Restaurant',
       'Sandwich Place', 'Food Truck', 'Gym 

In [38]:
# check if the results contain unique values
"Neighborhood" in venues_df['VenueCategory'].unique()

True

In [39]:
# one hot encoding
bangalore_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
bangalore_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [bangalore_onehot.columns[-1]] + list(bangalore_onehot.columns[:-1])
bangalore_onehot = bangalore_onehot[fixed_columns]

print(bangalore_onehot.shape)
bangalore_onehot.head()

(6805, 236)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Astrologer,Athletics & Sports,Australian Restaurant,Auto Workshop,BBQ Joint,Badminton Court,Bagel Shop,Bakery,Bar,Basketball Court,Bed & Breakfast,Beer Garden,Bengali Restaurant,Big Box Store,Bike Shop,Bistro,Boarding House,Bookstore,Botanical Garden,Boutique,Bowling Alley,Boxing Gym,Breakfast Spot,Brewery,Bridge,Bubble Tea Shop,Buffet,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Camera Store,Campground,Candy Store,Capitol Building,Caribbean Restaurant,Chaat Place,Chettinad Restaurant,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dhaba,Dim Sum Restaurant,Diner,Dive Bar,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Fish & Chips Shop,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Forest,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,General Entertainment,German Restaurant,Go Kart Track,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Halal Restaurant,Health Food Store,Historic Site,Hockey Arena,Hookah Bar,Hospital,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,Hyderabadi Restaurant,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karnataka Restaurant,Kebab Restaurant,Kerala Restaurant,Kids Store,Korean Restaurant,Lake,Light Rail Station,Lighthouse,Liquor Store,Lounge,Maharashtrian Restaurant,Market,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Motorcycle Shop,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Music Store,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Office,Other Great Outdoors,Outlet Store,Paintball Field,Pakistani Restaurant,Paper / Office Supplies Store,Park,Parsi Restaurant,Performing Arts Venue,Pharmacy,Pizza Place,Platform,Playground,Plaza,Pool,Pool Hall,Pub,Punjabi Restaurant,Racetrack,Rajasthani Restaurant,Recreation Center,Resort,Rest Area,Restaurant,Road,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,South Indian Restaurant,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Szechuan Restaurant,Tea Room,Tech Startup,Tex-Mex Restaurant,Thai Restaurant,Theater,Tibetan Restaurant,Toll Booth,Toll Plaza,Tourist Information Center,Toy / Game Store,Track Stadium,Trail,Train Station,Travel & Transport,Turkish Restaurant,Udupi Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Adugodi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Adugodi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Adugodi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Adugodi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Adugodi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [40]:
bangalore_grouped = bangalore_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(bangalore_grouped.shape)
bangalore_grouped.head()

(126, 236)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Astrologer,Athletics & Sports,Australian Restaurant,Auto Workshop,BBQ Joint,Badminton Court,Bagel Shop,Bakery,Bar,Basketball Court,Bed & Breakfast,Beer Garden,Bengali Restaurant,Big Box Store,Bike Shop,Bistro,Boarding House,Bookstore,Botanical Garden,Boutique,Bowling Alley,Boxing Gym,Breakfast Spot,Brewery,Bridge,Bubble Tea Shop,Buffet,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Camera Store,Campground,Candy Store,Capitol Building,Caribbean Restaurant,Chaat Place,Chettinad Restaurant,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dhaba,Dim Sum Restaurant,Diner,Dive Bar,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Fish & Chips Shop,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Forest,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,General Entertainment,German Restaurant,Go Kart Track,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Halal Restaurant,Health Food Store,Historic Site,Hockey Arena,Hookah Bar,Hospital,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,Hyderabadi Restaurant,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karnataka Restaurant,Kebab Restaurant,Kerala Restaurant,Kids Store,Korean Restaurant,Lake,Light Rail Station,Lighthouse,Liquor Store,Lounge,Maharashtrian Restaurant,Market,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Motorcycle Shop,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Music Store,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Office,Other Great Outdoors,Outlet Store,Paintball Field,Pakistani Restaurant,Paper / Office Supplies Store,Park,Parsi Restaurant,Performing Arts Venue,Pharmacy,Pizza Place,Platform,Playground,Plaza,Pool,Pool Hall,Pub,Punjabi Restaurant,Racetrack,Rajasthani Restaurant,Recreation Center,Resort,Rest Area,Restaurant,Road,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,South Indian Restaurant,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Szechuan Restaurant,Tea Room,Tech Startup,Tex-Mex Restaurant,Thai Restaurant,Theater,Tibetan Restaurant,Toll Booth,Toll Plaza,Tourist Information Center,Toy / Game Store,Track Stadium,Trail,Train Station,Travel & Transport,Turkish Restaurant,Udupi Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Adugodi,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.01,0.0,0.03,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.16,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
1,Ananthnagar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.033333,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.033333,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Anjanapura,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Arekere,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.012987,0.0,0.012987,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.012987,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.025974,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.077922,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025974,0.0,0.025974,0.0,0.025974,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064935,0.0,0.0,0.0,0.0,0.0,0.012987,0.012987,0.012987,0.012987,0.012987,0.0,0.0,0.0,0.038961,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051948,0.0,0.12987,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.025974,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.038961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.025974,0.0,0.0,0.0,0.0,0.064935,0.0,0.0,0.012987,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Austin Town,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.01,0.02,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.01,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.07,0.01,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [45]:
len(bangalore_grouped[bangalore_grouped["Ice Cream Shop"] > 0])

90

## Create a new DataFrame for Ice Cream Shop data only

In [52]:
bangalore_Ice_Cream_Shop = bangalore_grouped[["Neighborhoods","Ice Cream Shop"]]

In [53]:
bangalore_Ice_Cream_Shop.head()

Unnamed: 0,Neighborhoods,Ice Cream Shop
0,Adugodi,0.03
1,Ananthnagar,0.033333
2,Anjanapura,0.0
3,Arekere,0.051948
4,Austin Town,0.05


## 7. Cluster Neighborhoods

# Run k-means to cluster the neighborhoods in Bangalore into 3 clusters.

In [54]:
# set number of clusters
kclusters = 4

bangalore_clustering = bangalore_Ice_Cream_Shop.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(bangalore_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 2, 0, 0, 0, 0, 2, 2, 1], dtype=int32)

In [56]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
bangalore_merged = bangalore_Ice_Cream_Shop.copy()

# add clustering labels
bangalore_merged["Cluster Labels"] = kmeans.labels_

In [57]:
bangalore_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
bangalore_merged.head()

Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels
0,Adugodi,0.03,0
1,Ananthnagar,0.033333,0
2,Anjanapura,0.0,2
3,Arekere,0.051948,0
4,Austin Town,0.05,0


In [58]:

# merge bangalore_grouped with toronto_data to add latitude/longitude for each neighborhood
bangalore_merged = bangalore_merged.join(bangalore_df.set_index("Neighborhood"), on="Neighborhood")

print(bangalore_merged.shape)
bangalore_merged.head() # check the last columns!

(126, 5)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
0,Adugodi,0.03,0,12.94402,77.60783
1,Ananthnagar,0.033333,0,12.95408,77.54129
2,Anjanapura,0.0,2,12.8581,77.55907
3,Arekere,0.051948,0,12.88565,77.59665
4,Austin Town,0.05,0,12.96343,77.61294


In [59]:

# sort the results by Cluster Labels
print(bangalore_merged.shape)
bangalore_merged.sort_values(["Cluster Labels"], inplace=True)
bangalore_merged

(126, 5)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
0,Adugodi,0.03,0,12.94402,77.60783
76,Marathahalli,0.040541,0,12.95467,77.70752
75,Malleswaram,0.03,0,13.0063,77.568304
72,Lingarajapuram,0.046154,0,13.00555,77.62597
71,Kundalahalli,0.035714,0,12.96751,77.715
70,Kumaraswamy Layout,0.026316,0,12.89818,77.55929
65,"Kodihalli, Bangalore",0.05,0,12.96613,77.64977
80,"Murphy Town, Bangalore",0.03,0,12.97953,77.6241
60,Kalyan Nagar,0.054054,0,12.96819,77.52114
56,Kaggadasapura,0.023256,0,12.9848,77.67503


In [60]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bangalore_merged['Latitude'], bangalore_merged['Longitude'], bangalore_merged['Neighborhood'], bangalore_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [61]:
# save the map as HTML file
map_clusters.save('map_clusters.html')


## 8. Examine Clusters

# cluster 1

In [62]:
bangalore_merged.loc[bangalore_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
0,Adugodi,0.03,0,12.94402,77.60783
76,Marathahalli,0.040541,0,12.95467,77.70752
75,Malleswaram,0.03,0,13.0063,77.568304
72,Lingarajapuram,0.046154,0,13.00555,77.62597
71,Kundalahalli,0.035714,0,12.96751,77.715
70,Kumaraswamy Layout,0.026316,0,12.89818,77.55929
65,"Kodihalli, Bangalore",0.05,0,12.96613,77.64977
80,"Murphy Town, Bangalore",0.03,0,12.97953,77.6241
60,Kalyan Nagar,0.054054,0,12.96819,77.52114
56,Kaggadasapura,0.023256,0,12.9848,77.67503


# cluster 2

In [63]:
bangalore_merged.loc[bangalore_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
14,Bharathnagar,0.083333,1,13.08299,77.54779
15,Bilekahalli,0.074468,1,12.90004,77.6043
51,Jakkur,0.090909,1,13.07563,77.60392
16,Bommanahalli,0.061538,1,12.90307,77.62442
86,Padmanabhanagar,0.065574,1,12.91547,77.55311
11,Basavanagudi,0.08,1,12.93901,77.57136
99,Sahakara Nagar,0.085714,1,13.06272,77.58548
43,Hebbal,0.076923,1,13.04969,77.58951
9,Banashankari,0.09,1,12.92229,77.56988
79,Milk Colony,0.097826,1,13.023193,77.579059


# cluster 3

In [64]:
bangalore_merged.loc[bangalore_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
95,Ramamurthy Nagar,0.0,2,13.02378,77.67788
122,Wilson Garden,0.014925,2,12.94962,77.59675
120,Vyalikaval,0.0,2,13.03791,77.61859
96,Ramanjaneyanagar,0.0,2,12.88334,77.54093
38,Girinagar,0.0,2,12.94278,77.54121
118,"Vijayanagar, Bangalore",0.0,2,13.07596,77.65242
102,Shikaripalya,0.0,2,12.83265,77.65685
115,Varthur,0.0,2,12.94348,77.74698
104,"Siddapura, Bangalore",0.0,2,12.95607,77.73197
8,Bahubalinagar,0.0,2,13.0447,77.55


# cluster 4

In [65]:
bangalore_merged.loc[bangalore_merged['Cluster Labels'] == 3]

Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
61,Kamakshipalya,0.15,3,12.98699,77.52482
100,Sanjaynagar,0.116667,3,13.03692,77.57459
78,Mathikere,0.121212,3,13.03231,77.55866
30,Dollars Colony,0.126984,3,13.04022,77.56907
98,Sadashivanagar,0.12987,3,13.01481,77.57771
12,Basaveshwaranagar,0.108108,3,12.99226,77.53441
64,Kodigehalli,0.105263,3,13.06724,77.56989


# Observations

Most of the Ice Cream shop concentrated in the central area of Bangalore city, with the highest number in cluster 1 and moderate number in cluster 3. On the other hand, cluster 4 has very low number to totally no Ice Cream shop in theneighborhoods. This representsa great opportunity and high potential areas to open new Ice Cream shop as there is very little to no competition from existing Ice Cream shops. Meanwhile, Ice Cream shop in cluster 1 are likely suffering from intense competition due to oversupply and highconcentration of Ice Cream shop. From another perspective, this also shows that the oversupply of Ice Cream shop mostly happened in the central area of the city, with the suburb area still have very few Ice Cream shop. Therefore, this project recommends property developers to capitalize on these findings to open new Ice Cream shop in neighborhoods in cluster 4 with little to no competition. Property developers with unique selling propositions to stand out from the competition can also open new Ice Cream shop in neighborhoods in cluster1 with moderate competition. Lastly, property developers are advised to avoid neighborhoods in cluster1 which already have high concentration of Ice Cream shop and suffering from intense competition.