# IBM Data Science Professional Certificate

## Applied Data Science Capstone

### Opening a Hotel in New Delhi, India

#### Import the Required Libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Libraries imported.


#### Prepare and Preprocess the Data

In [2]:
data = requests.get("https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Delhi").text

In [3]:
# Prepare a DataFrame using the wiki-data
soup = BeautifulSoup(data, 'html.parser')
neighborhoodList = []
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)
df = pd.DataFrame({"Neighborhood": neighborhoodList[1:]})

print("Total no of neighborhoods:",df.shape[0])
df.head()

Total no of neighborhoods: 136


Unnamed: 0,Neighborhood
0,Ashok Nagar (Delhi)
1,Ashok Vihar
2,Ashram Chowk
3,Babarpur
4,"Badarpur, Delhi"


In [4]:
def get_lat_long(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Delhi, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    #print(lat_lng_coords)
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    return latitude, longitude

In [5]:
df['Latitude'], df['Longitude'] = zip(*df['Neighborhood'].apply(get_lat_long))

In [6]:
df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Ashok Nagar (Delhi),28.69223,77.30127
1,Ashok Vihar,28.69042,77.17606
2,Ashram Chowk,28.710568,77.326949
3,Babarpur,28.50737,77.30347
4,"Badarpur, Delhi",28.50737,77.30347


#### Map Visualization - Neighborhood of Delhi

In [7]:
# get the coordinates of Kuala Lumpur
address = 'Delhi, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Delhi, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Delhi, India 28.6517178, 77.2219388.


In [8]:
# create map of Delhi using latitude and longitude values
map_delhi = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_delhi)  
    
print("Neighborhood of Delhi")
map_delhi

Neighborhood of Delhi


In [9]:
# save the map as HTML file
map_delhi.save('map_delhi.html')

#### Pull the data from FourSquare APIs

In [10]:
CLIENT_ID = 'KAUUTFRULDXR3FLRNMYOERZHNEA30XDZOSZEEC4NXJENAYTE' # your Foursquare ID
CLIENT_SECRET = 'RGTC10IWBBFHG0MJXLYFOTE5250AIUHZUGYSTXHCQWH5X5V3' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: KAUUTFRULDXR3FLRNMYOERZHNEA30XDZOSZEEC4NXJENAYTE
CLIENT_SECRET:RGTC10IWBBFHG0MJXLYFOTE5250AIUHZUGYSTXHCQWH5X5V3


In [11]:
df.loc[0, 'Neighborhood']

'Ashok Nagar (Delhi)'

In [12]:
neighborhood_latitude = df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Ashok Nagar (Delhi) are 28.692230000000052, 77.30127000000005.


#### Pull the nearby venues

In [13]:
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&radius={}&limit={}&v={}'.format(CLIENT_ID, CLIENT_SECRET, neighborhood_latitude, neighborhood_longitude, 1000, 100, VERSION)
results = requests.get(url).json()

In [14]:
results

{'meta': {'code': 200, 'requestId': '5dd082dc760a7f001b520795'},
 'response': {'headerLocation': 'Delhi',
  'headerFullLocation': 'Delhi',
  'headerLocationGranularity': 'city',
  'totalResults': 6,
  'suggestedBounds': {'ne': {'lat': 28.70123000900006,
    'lng': 77.31151064003826},
   'sw': {'lat': 28.683229991000044, 'lng': 77.29102935996183}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '56f0e1a5498e339a6f9cacd6',
       'name': 'Neeraj Kumar Garg',
       'location': {'address': 'Ashok Nagar',
        'crossStreet': 'New Ashok Nagar',
        'lat': 28.692731217431845,
        'lng': 77.29877207487533,
        'labeledLatLngs': [{'label': 'display',
          'lat': 28.692731217431845,
          'lng': 77.29877207487533}],
        'distance': 250,
        'postal

In [15]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [16]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Neeraj Kumar Garg,Spa,28.692731,77.298772
1,Sutta Chowk,Smoke Shop,28.697897,77.30001
2,My Idea Store,Mobile Phone Shop,28.686405,77.29952
3,Axis Bank ATM,ATM,28.6871,77.29624
4,Haldirams Crossriver Mall,Indian Restaurant,28.687241,77.293538


In [17]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

6 venues were returned by Foursquare.


In [18]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
LIMIT = 100
delhi_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

In [20]:
print(delhi_venues.shape)
delhi_venues.head(10)

(1005, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Ashok Nagar (Delhi),28.69223,77.30127,Neeraj Kumar Garg,28.692731,77.298772,Spa
1,Ashok Vihar,28.69042,77.17606,Domino's Pizza,28.693,77.177,Pizza Place
2,Ashok Vihar,28.69042,77.17606,Sagar Ratna | सागर रतना,28.693381,77.177977,South Indian Restaurant
3,Ashok Vihar,28.69042,77.17606,Kay's Bar-Be-Que,28.693278,77.173177,BBQ Joint
4,Ashok Vihar,28.69042,77.17606,"Kays, Ashok Vihar",28.693572,77.173003,Indian Restaurant
5,Ashok Vihar,28.69042,77.17606,J Block Murga Market,28.687144,77.173035,Market
6,Bali Nagar,28.65218,77.129775,Gianis Ice Cream Parlor,28.651737,77.129924,Dessert Shop
7,Bali Nagar,28.65218,77.129775,Vidhan Sabha metro station,28.654045,77.129745,Light Rail Station
8,Bali Nagar,28.65218,77.129775,Raja garden,28.65068,77.126284,Garden
9,Bali Nagar,28.65218,77.129775,Respawn Gaming Cafe,28.649474,77.133211,Arcade


In [21]:
delhi_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ashok Nagar (Delhi),1,1,1,1,1,1
Ashok Vihar,5,5,5,5,5,5
Bali Nagar,4,4,4,4,4,4
Ber Sarai,7,7,7,7,7,7
Bhajanpura,1,1,1,1,1,1
Chanakyapuri,2,2,2,2,2,2
Chandni Chowk,13,13,13,13,13,13
Chittaranjan Park,5,5,5,5,5,5
"Connaught Place, New Delhi",69,69,69,69,69,69
Dariba Kalan,15,15,15,15,15,15


In [22]:
print('There are {} uniques categories.'.format(len(delhi_venues['Venue Category'].unique())))

There are 152 uniques categories.


#### One-hot encoding for the catogory of the venues

In [23]:
# one hot encoding
delhi_onehot = pd.get_dummies(delhi_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
delhi_onehot['Neighborhood'] = delhi_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [delhi_onehot.columns[-1]] + list(delhi_onehot.columns[:-1])
delhi_onehot = delhi_onehot[fixed_columns]

delhi_onehot.head()

Unnamed: 0,Yoga Studio,ATM,Afghan Restaurant,Airport Food Court,Airport Lounge,Airport Terminal,American Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,Astrologer,Athletics & Sports,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Beer Garden,Bengali Restaurant,Bike Shop,Bistro,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Burger Joint,Bus Station,Business Service,Café,Campground,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Concert Hall,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Diner,Donut Shop,Dumpling Restaurant,Electronics Store,English Restaurant,Fabric Shop,Falafel Restaurant,Fast Food Restaurant,Fish Market,Flea Market,Food & Drink Shop,Food Truck,French Restaurant,Furniture / Home Store,Garden,Gastropub,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Hindu Temple,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Internet Cafe,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Light Rail Station,Lighting Store,Lounge,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Mosque,Motel,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Museum,Neighborhood,Nightclub,North Indian Restaurant,Nudist Beach,Office,Other Nightlife,Paper / Office Supplies Store,Park,Parking,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Resort,Rest Area,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Shoe Store,Shopping Mall,Shopping Plaza,Smoke Shop,Snack Place,South Indian Restaurant,Spa,Spanish Restaurant,Stadium,Steakhouse,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Tibetan Restaurant,Tourist Information Center,Train Station,University,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Water Park,Wine Bar,Women's Store
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Ashok Vihar,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Ashok Vihar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Ashok Vihar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Ashok Vihar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [24]:
delhi_onehot.shape

(1005, 152)

In [25]:
delhi_grouped = delhi_onehot.groupby('Neighborhood').mean().reset_index()
delhi_grouped

Unnamed: 0,Neighborhood,Yoga Studio,ATM,Afghan Restaurant,Airport Food Court,Airport Lounge,Airport Terminal,American Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,Astrologer,Athletics & Sports,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Beer Garden,Bengali Restaurant,Bike Shop,Bistro,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Burger Joint,Bus Station,Business Service,Café,Campground,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Concert Hall,Convenience Store,Deli / Bodega,Department Store,Dessert Shop,Diner,Donut Shop,Dumpling Restaurant,Electronics Store,English Restaurant,Fabric Shop,Falafel Restaurant,Fast Food Restaurant,Fish Market,Flea Market,Food & Drink Shop,Food Truck,French Restaurant,Furniture / Home Store,Garden,Gastropub,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Hindu Temple,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Internet Cafe,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Light Rail Station,Lighting Store,Lounge,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Mosque,Motel,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Museum,Nightclub,North Indian Restaurant,Nudist Beach,Office,Other Nightlife,Paper / Office Supplies Store,Park,Parking,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Resort,Rest Area,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Shoe Store,Shopping Mall,Shopping Plaza,Smoke Shop,Snack Place,South Indian Restaurant,Spa,Spanish Restaurant,Stadium,Steakhouse,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Tibetan Restaurant,Tourist Information Center,Train Station,University,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Water Park,Wine Bar,Women's Store
0,Ashok Nagar (Delhi),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Ashok Vihar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bali Nagar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Ber Sarai,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bhajanpura,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Chanakyapuri,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Chandni Chowk,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.0,0.0,0.230769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.153846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Chittaranjan Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Connaught Place, New Delhi",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028986,0.0,0.0,0.0,0.028986,0.0,0.014493,0.0,0.086957,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.086957,0.0,0.072464,0.028986,0.0,0.057971,0.0,0.0,0.0,0.028986,0.0,0.014493,0.0,0.014493,0.0,0.0,0.0,0.0,0.014493,0.043478,0.0,0.0,0.014493,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.014493,0.144928,0.014493,0.0,0.0,0.014493,0.014493,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.028986,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.014493,0.028986,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Dariba Kalan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
delhi_grouped.shape

(121, 152)

In [27]:
len(delhi_grouped[delhi_grouped["Hotel"] > 0])

36

#### Analyze the Hotel data

In [28]:
delhi_hotel = delhi_grouped[["Neighborhood","Hotel"]]

In [29]:
delhi_hotel.head(20)

Unnamed: 0,Neighborhood,Hotel
0,Ashok Nagar (Delhi),0.0
1,Ashok Vihar,0.0
2,Bali Nagar,0.0
3,Ber Sarai,0.142857
4,Bhajanpura,0.0
5,Chanakyapuri,0.5
6,Chandni Chowk,0.153846
7,Chittaranjan Park,0.0
8,"Connaught Place, New Delhi",0.014493
9,Dariba Kalan,0.133333


### Clustering using K-Means clustering algorithm

In [30]:
# set number of clusters
kclusters = 5

delhi_clustering = delhi_hotel.drop(["Neighborhood"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(delhi_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 0, 1, 4, 0, 1, 1, 0], dtype=int32)

In [31]:
# create a new dataframe that includes the cluster labels
delhi_merged = delhi_hotel.copy()

# add clustering labels
delhi_merged["Cluster Label"] = kmeans.labels_
delhi_merged.head()

Unnamed: 0,Neighborhood,Hotel,Cluster Label
0,Ashok Nagar (Delhi),0.0,1
1,Ashok Vihar,0.0,1
2,Bali Nagar,0.0,1
3,Ber Sarai,0.142857,0
4,Bhajanpura,0.0,1


In [32]:
# merge delhi_grouped with delhi_data to add latitude/longitude for each neighborhood
delhi_merged = delhi_merged.join(df.set_index("Neighborhood"), on="Neighborhood")

print(delhi_merged.shape)
delhi_merged.head() # check the last columns!

(121, 5)


Unnamed: 0,Neighborhood,Hotel,Cluster Label,Latitude,Longitude
0,Ashok Nagar (Delhi),0.0,1,28.69223,77.30127
1,Ashok Vihar,0.0,1,28.69042,77.17606
2,Bali Nagar,0.0,1,28.65218,77.129775
3,Ber Sarai,0.142857,0,28.54954,77.1817
4,Bhajanpura,0.0,1,28.6998,77.25917


In [33]:
# Sort by cluster labels
delhi_merged.sort_values(["Cluster Label"], inplace=True)
delhi_merged

Unnamed: 0,Neighborhood,Hotel,Cluster Label,Latitude,Longitude
48,Lutyens' Delhi,0.185185,0,28.62119,77.21671
37,Karol Bagh,0.117647,0,28.65045,77.18873
32,Jangpura,0.2,0,28.58337,77.24714
119,Yamuna Pushta,0.185185,0,28.62105,77.2171
86,Raisina Hill,0.15,0,28.618397,77.215478
28,Gulmohar Park,0.083333,0,28.55439,77.21252
24,"Golf Links, New Delhi",0.1,0,28.60304,77.23269
23,Gole Market,0.1,0,28.63408,77.20576
43,"Krishna Nagar, Delhi",0.2,0,28.56364,77.19367
76,Old Delhi,0.125,0,28.65432,77.23259


#### Map Visualization - Neighborhood of Delhi

In [34]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(delhi_merged['Latitude'], delhi_merged['Longitude'], delhi_merged['Neighborhood'], delhi_merged['Cluster Label']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [35]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

#### Analyze the Clusters

#### First Cluster (Cluster Label - 0)

In [36]:
delhi_merged.loc[delhi_merged['Cluster Label'] == 0]

Unnamed: 0,Neighborhood,Hotel,Cluster Label,Latitude,Longitude
48,Lutyens' Delhi,0.185185,0,28.62119,77.21671
37,Karol Bagh,0.117647,0,28.65045,77.18873
32,Jangpura,0.2,0,28.58337,77.24714
119,Yamuna Pushta,0.185185,0,28.62105,77.2171
86,Raisina Hill,0.15,0,28.618397,77.215478
28,Gulmohar Park,0.083333,0,28.55439,77.21252
24,"Golf Links, New Delhi",0.1,0,28.60304,77.23269
23,Gole Market,0.1,0,28.63408,77.20576
43,"Krishna Nagar, Delhi",0.2,0,28.56364,77.19367
76,Old Delhi,0.125,0,28.65432,77.23259


#### Second Cluster (Cluster Label - 1)

In [37]:
delhi_merged.loc[delhi_merged['Cluster Label'] == 1]

Unnamed: 0,Neighborhood,Hotel,Cluster Label,Latitude,Longitude
63,Munirka,0.0,1,28.55503,77.17127
64,Nanakpura,0.0,1,28.62283,77.11336
87,"Rajendra Nagar, Delhi",0.0,1,28.640658,77.185701
65,Nand Nagri,0.0,1,28.6967,77.30386
66,Nangloi Jat,0.0,1,28.67856,77.06764
85,Punjabi Bagh,0.0,1,28.66633,77.12525
84,Pitam Pura,0.0,1,28.69589,77.13726
72,New Friends Colony,0.0,1,28.5781,77.26999
71,New Delhi,0.014925,1,28.63095,77.21722
68,Narela,0.0,1,28.83977,77.07693


#### Third Cluster (Cluster Label - 2)

In [38]:
delhi_merged.loc[delhi_merged['Cluster Label'] == 2]

Unnamed: 0,Neighborhood,Hotel,Cluster Label,Latitude,Longitude
111,Sriniwaspuri,0.666667,2,28.56568,77.25733
80,Pamposh Enclave,0.571429,2,28.546776,77.244759


#### Fourth Cluster (Cluster Label - 3)

In [39]:
delhi_merged.loc[delhi_merged['Cluster Label'] == 3]

Unnamed: 0,Neighborhood,Hotel,Cluster Label,Latitude,Longitude
118,West Patel Nagar,0.25,3,28.6478,77.16447
114,Urdu Bazaar,0.285714,3,28.64989,77.235145
57,Mayur Vihar Phase - 3,0.333333,3,28.61125,77.33406
60,Moti Bagh,0.25,3,28.58363,77.16472
51,Mahipalpur,0.25,3,28.54843,77.13636
104,"Shakti Nagar, Delhi",0.333333,3,28.67037,77.17414
82,Paschim Vihar,0.333333,3,28.66933,77.09173
83,Patel Nagar,0.25,3,28.6478,77.16447
70,"Netaji Nagar, Delhi",0.25,3,28.57747,77.18516


#### Fifth Cluster (Cluster Label - 4)

In [40]:
delhi_merged.loc[delhi_merged['Cluster Label'] == 4]

Unnamed: 0,Neighborhood,Hotel,Cluster Label,Latitude,Longitude
5,Chanakyapuri,0.5,4,28.59506,77.18573
77,Paharganj,0.473684,4,28.64596,77.21493
50,Maharani Bagh,0.428571,4,28.57223,77.26357


#### Final Remarks

After careful analysisi of all five clusters It's clear that the Places which are part of Second cluster (Cluster Label - 1) are most suitable for opening a new Hotel. Second cluster (Cluster Label - 1) has least no of existing hotels but at the same most of the places in cluster 1 are well connected to Airport/Railway Station and other popular public places. 

__This analysis suggests to open a new Hotel in the Second cluster (Cluster Label - 1)__