# BATTLE OF THE NEIGHBORHOODS

-------------------------------------------------------------

### First, let us import the relevant libraries

In [1]:
import numpy as np
import pandas as pd

#!conda install -c conda-forge folium
import folium

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

import matplotlib.pyplot as plt

import requests
import json

from pandas.io.json import json_normalize
from sklearn.cluster import KMeans

### We will retrieve the coordinates for Connaught Place

In [2]:
address = 'Connaught Place, National Capital Territory of Delhi'
geolocator = Nominatim(user_agent = 'New')
location = geolocator.geocode(address)
longitude = location.longitude
latitude = location.latitude

### Let's see the location on a map

In [3]:
map_delhi = folium.Map(location = [latitude, longitude], zoom_start = 12)
label = folium.Popup('Connaught Place')
folium.CircleMarker(
    [latitude, longitude],
    radius = 5,
    popup = label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7).add_to(map_delhi)

<folium.vector_layers.CircleMarker at 0x7f6353c0e9e8>

In [4]:
map_delhi

### We will set up the credentials

In [5]:
# The code was removed by Watson Studio for sharing.

In [6]:
LIMIT = 100
radius = 3000

## For Restaurants

#### We will query all the restaurants in Connaught Place

In [7]:
query = 'Restaurant'
url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&query={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION,
    query,
    latitude, 
    longitude, 
    radius, 
    LIMIT)

In [8]:
results = requests.get(url).json()

In [9]:
results

{'meta': {'code': 200, 'requestId': '5eb2e9a5b1cac0001ca3bc92'},
 'response': {'venues': [{'id': '5177f8d5e4b0f361220d4766',
    'name': 'Anand Restaurant',
    'location': {'address': '35 Community Centre',
     'crossStreet': 'New Friends Colony',
     'lat': 28.632091466562407,
     'lng': 77.22495786591341,
     'labeledLatLngs': [{'label': 'display',
       'lat': 28.632091466562407,
       'lng': 77.22495786591341}],
     'distance': 510,
     'postalCode': '110025',
     'cc': 'IN',
     'city': 'New Delhi',
     'state': 'Delhi',
     'country': 'India',
     'formattedAddress': ['35 Community Centre (New Friends Colony)',
      'New Delhi 110025',
      'Delhi',
      'India']},
    'categories': [{'id': '4bf58dd8d48988d10f941735',
      'name': 'Indian Restaurant',
      'pluralName': 'Indian Restaurants',
      'shortName': 'Indian',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/indian_',
       'suffix': '.png'},
      'primary': True}],
    'referra

#### We will create a dataframe to store the contents

In [10]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

In [11]:
# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.postalCode,location.state,name,referralId
0,"[{'id': '4bf58dd8d48988d10f941735', 'name': 'I...",False,5177f8d5e4b0f361220d4766,35 Community Centre,IN,New Delhi,India,New Friends Colony,510,"[35 Community Centre (New Friends Colony), New...","[{'label': 'display', 'lat': 28.63209146656240...",28.632091,77.224958,110025.0,Delhi,Anand Restaurant,v-1588783620
1,"[{'id': '4bf58dd8d48988d111941735', 'name': 'J...",False,5b991352f00a70002cdff3a6,,IN,New Delhi,India,,54,"[New Delhi 110001, Delhi, India]","[{'label': 'display', 'lat': 28.631444, 'lng':...",28.631444,77.219244,110001.0,Delhi,FUJI RESTAURANT & BAR,v-1588783620
2,"[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",False,5467166a498e6c2b038ef3bc,,IN,,India,,77,[India],"[{'label': 'display', 'lat': 28.63186068637747...",28.631861,77.220371,,,The Host Restaurant,v-1588783620
3,"[{'id': '4bf58dd8d48988d149941735', 'name': 'T...",False,4f5063bbe4b062d9d9d5a175,Hare Krishna hotel,IN,New Delhi,India,,1312,"[Hare Krishna hotel, New Delhi, Delhi, India]","[{'label': 'display', 'lat': 28.641011, 'lng':...",28.641011,77.212043,,Delhi,Tom Yam Rooftop Restaurant,v-1588783620
4,"[{'id': '4bf58dd8d48988d10f941735', 'name': 'I...",False,54086753498e0efae46a1c8f,,IN,,India,,83,[India],"[{'label': 'display', 'lat': 28.6315733133297,...",28.631573,77.220624,,,anand restaurant cp,v-1588783620


#### Let's filter the venues with relevant information and discard the remaining ones

In [12]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

In [13]:
dataframe_filtered.name

0                            Anand Restaurant
1                       FUJI RESTAURANT & BAR
2                         The Host Restaurant
3                  Tom Yam Rooftop Restaurant
4                         anand restaurant cp
5                           York's Restaurant
6                              KFC Restaurant
7                           Zafron restaurant
8                              Zen Restaurant
9                  My Bar Lounge & Restaurant
10                         Kwality Restaurant
11                   Japanese Fuji Restaurant
12                           Anand Restaurant
13                     White water restaurant
14            Vega Pure Vegetarian Restaurant
15       Parikrama - The Revolving Restaurant
16                         Ravayat Restaurant
17                 My Lounge Bar & Restaurant
18                            Veda Restaurant
19                         zaffron Restaurant
20                        Maharani Restaurant
21                          Gulati

In [14]:
restaurant_list = dataframe_filtered[['name', 'lat', 'lng']] # This dataframe will later be used to merge into a larger dataframe containing information about other venues

#### Let's mark all the restaurants on the map

In [15]:
restaurants_map = folium.Map(location=[latitude, longitude], zoom_start=15)

# add the restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='green',
        popup=label,
        fill = True,
        fill_color='green',
        fill_opacity=0.6
    ).add_to(restaurants_map)

In [16]:
restaurants_map

### For Bars

#### We need to repeat the same process for Bars

In [18]:
query = 'Bar'
url2 = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&query={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION,
    query,
    latitude, 
    longitude, 
    radius, 
    LIMIT)

In [19]:
results2 = requests.get(url2).json()

In [20]:
# assign relevant part of JSON to venues
venues2 = results2['response']['venues']

In [21]:
# tranform venues into a dataframe
dataframe = json_normalize(venues2)
dataframe.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.neighborhood,location.postalCode,location.state,name,referralId
0,"[{'id': '50327c8591d4c4b30a586d5d', 'name': 'B...",False,51470bf7e4b0251c3c4fc8d0,F Block,IN,Delhi,India,Inner Circle Cp,76,"[F Block (Inner Circle Cp), Delhi, Delhi, India]","[{'label': 'display', 'lat': 28.63172410105139...",28.631724,77.220472,,,Delhi,Bar King Street,v-1588783642
1,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",False,52b024a1498e919d07553e50,,IN,,India,,209,[India],"[{'label': 'display', 'lat': 28.63281350472011...",28.632814,77.221181,,,,My Bar,v-1588783642
2,"[{'id': '4bf58dd8d48988d1dc931735', 'name': 'T...",False,51690773e4b09433491d990e,"81, N-Block, Outer Circle, Connaught Place",IN,New Delhi,India,,240,"[81, N-Block, Outer Circle, Connaught Place, N...","[{'label': 'display', 'lat': 28.63092047361482...",28.63092,77.222194,,110001.0,Delhi,Cha Bar | चा बार,v-1588783642
3,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",False,4fae2e04e4b0771d82721cab,"G 68, Outer Circle, Connaught Place",IN,New Delhi,India,Opposite Alka Hotel,320,"[G 68, Outer Circle, Connaught Place (Opposite...","[{'label': 'display', 'lat': 28.63238752868990...",28.632388,77.216717,,110001.0,Delhi,My Bar Lounge & Restaurant,v-1588783642
4,"[{'id': '4bf58dd8d48988d1d5941735', 'name': 'H...",False,4e89f79e775b4a1123c27d38,Barakhamba Avenue,IN,New Delhi,India,Connaught Place,1037,"[Barakhamba Avenue (Connaught Place), New Delh...","[{'label': 'display', 'lat': 28.62818896975973...",28.628189,77.229767,,110001.0,Delhi,24/7 Bar @ The Lalit Hotel,v-1588783642


In [22]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

In [23]:
dataframe_filtered.name

0                                       Bar King Street
1                                                My Bar
2                                      Cha Bar | चा बार
3                            My Bar Lounge & Restaurant
4                            24/7 Bar @ The Lalit Hotel
5                                             Banta Bar
6                               Henri's bar Le meridien
7                                 FUJI RESTAURANT & BAR
8                                                My Bar
9                                           Station Bar
10                                  My Bar Headquarters
11                                     BLUES CAFÉ & BAR
12                                       cocoa cafe bar
13                                Attitude Cafe and Bar
14                                     Bar. King Street
15                                  Chili's Grill & Bar
16                                              Barista
17                                       Quote B

In [24]:
bar_list = dataframe_filtered[['name', 'lat', 'lng']] # This dataframe will later be used to merge into a larger dataframe containing information about other venues

In [25]:
bars_map = folium.Map(location=[latitude, longitude], zoom_start=15) 

# add the restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='red',
        popup=label,
        fill = True,
        fill_color='red',
        fill_opacity=0.6
    ).add_to(bars_map)

In [26]:
bars_map

## For Hotels

#### Finally, we repeat the process for Hotels

In [27]:
query = 'Hotel'
url3 = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&query={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION,
    query,
    latitude, 
    longitude, 
    radius, 
    LIMIT)

In [28]:
results3 = requests.get(url3).json()

In [29]:
# assign relevant part of JSON to venues
venues3 = results3['response']['venues']

In [30]:
# tranform venues into a dataframe
dataframe = json_normalize(venues3)
dataframe.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.neighborhood,location.postalCode,location.state,name,referralId,venuePage.id
0,"[{'id': '54135bf5e4b08f3d2429dfde', 'name': 'S...",False,519ba450498eb0c559152d94,"P-13/90, Connaught Circus",IN,New Delhi,India,,343,"[P-13/90, Connaught Circus, New Delhi 110001, ...","[{'label': 'display', 'lat': 28.63231946643500...",28.632319,77.216445,,110001,Delhi,HOTEL SARAVANA BHAVAN,v-1588783657,
1,"[{'id': '54135bf5e4b08f3d2429dfde', 'name': 'S...",False,4c91f42fae96a0932d79a746,"46, Janpath",IN,New Delhi,India,Opp. Janpath Market,484,"[46, Janpath (Opp. Janpath Market), New Delhi ...","[{'label': 'display', 'lat': 28.62704114687105...",28.627041,77.219514,,110001,Delhi,HOTEL SARAVANA BHAVAN,v-1588783657,
2,"[{'id': '4bf58dd8d48988d10f941735', 'name': 'I...",False,4e54ae3ba8093d27cca8aabd,,IN,New Delhi,India,,429,"[New Delhi, Delhi, India]","[{'label': 'display', 'lat': 28.63471071443335...",28.634711,77.222016,,,Delhi,Kake-Da-Hotel,v-1588783657,
3,"[{'id': '4bf58dd8d48988d1fa931735', 'name': 'H...",False,4cd2c5282b52a09380aa2039,G-59 Connaught Circus,IN,New Delhi,India,,374,"[G-59 Connaught Circus, New Delhi 110001, Delh...","[{'label': 'display', 'lat': 28.63389681944778...",28.633897,77.217241,,110001,Delhi,Radisson Blu Marina Hotel,v-1588783657,
4,"[{'id': '4bf58dd8d48988d1fa931735', 'name': 'H...",False,4b80e458f964a520bf9030e3,15 Parliament St,IN,New Delhi,India,,436,"[15 Parliament St, New Delhi 110 001, Delhi, I...","[{'label': 'display', 'lat': 28.62873117053961...",28.628731,77.216507,,110 001,Delhi,The Park Hotel,v-1588783657,


In [31]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

In [32]:
dataframe_filtered.name

0                                 HOTEL SARAVANA BHAVAN
1                                 HOTEL SARAVANA BHAVAN
2                                         Kake-Da-Hotel
3                             Radisson Blu Marina Hotel
4                                        The Park Hotel
5                                       The Lalit Hotel
6                              Hotel Metropolitan Delhi
7                                    Hotel Ramada Plaza
8                              Hotel Aman International
9     Hotel The Spot/Tourist Information Center/Trav...
10                                        Hotel The Raj
11                                The Royal Plaza Hotel
12                            Shangri-La's - Eros Hotel
13                              Hotel Delhi City Centre
14                                        Hotel ShivDev
15                                     Star Plaza Hotel
16                          hotel alka, connaught place
17                                          The 

In [33]:
hotel_list = dataframe_filtered[['name', 'lat', 'lng']] # This dataframe will later be used to merge into a larger dataframe containing information about other venues

In [34]:
hotels_map = folium.Map(location=[latitude, longitude], zoom_start=15) 

# add the restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(hotels_map)
    

In [35]:
hotels_map

# Now we will present all the venues on the map together

In [50]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=14.5)

In [51]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=15) 

# add the hotels as blue circle markers
for lat, lng in zip(hotel_list.lat, hotel_list.lng):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)
    
for lat, lng in zip(bar_list.lat, bar_list.lng):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='red',
        fill = True,
        fill_color='red',
        fill_opacity=0.6
    ).add_to(venues_map)
    
for lat, lng in zip(restaurant_list.lat, restaurant_list.lng):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='green',
        fill = True,
        fill_color='green',
        fill_opacity=0.6
    ).add_to(venues_map)

In [52]:
venues_map

# We will group the venues in Cluster 

#### First we will merge the three dataframes for restaurants, bars and hotels

In [39]:
cluster_list = pd.concat([hotel_list, bar_list, restaurant_list], ignore_index = True)

In [53]:
cluster_list.head()

Unnamed: 0,name,lat,lng,label
0,HOTEL SARAVANA BHAVAN,28.632319,77.216445,0
1,HOTEL SARAVANA BHAVAN,28.627041,77.219514,0
2,Kake-Da-Hotel,28.634711,77.222016,0
3,Radisson Blu Marina Hotel,28.633897,77.217241,0
4,The Park Hotel,28.628731,77.216507,0


#### We will perform K-Means clustering to group the data into clusters

In [54]:
kmeans = KMeans(init="k-means++", n_clusters=3)
kmeans.fit(cluster_list[['lat','lng']])

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=3, n_init=10, n_jobs=None, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

#### We will store the cluster lables and centres

In [55]:
labels = kmeans.labels_
centres = kmeans.cluster_centers_

#### Adding the label column with data in the dataframe

In [56]:
cluster_list['label'] = labels

In [57]:
folium.CircleMarker(
        centres[0],
        radius=150,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.3
    ).add_to(venues_map)

<folium.vector_layers.CircleMarker at 0x7f63515c3f60>

### We will represent the different clusters with a circle on the map

In [58]:
folium.CircleMarker(
        centres[1],
        radius=150,
        color='red',
        popup=label,
        fill = True,
        fill_color='red',
        fill_opacity=0.3
    ).add_to(venues_map)

<folium.vector_layers.CircleMarker at 0x7f6351563048>

In [59]:
folium.CircleMarker(
        centres[2],
        radius=150,
        color='green',
        popup=label,
        fill = True,
        fill_color='green',
        fill_opacity=0.3
    ).add_to(venues_map)

<folium.vector_layers.CircleMarker at 0x7f6351563080>

### Next we will mark the centres of the clusters on the map

In [60]:
folium.Marker(centres[0], popup = 'Cluster 1').add_to(venues_map)
folium.Marker(centres[1], popup = 'Cluster 2').add_to(venues_map)
folium.Marker(centres[2], popup = 'Cluster 3').add_to(venues_map)

<folium.map.Marker at 0x7f63515636d8>

In [61]:
venues_map

As we can see on the map. Several venues have been grouped into 3 clusters. The circle representing the cluster has not been scaled according to the number of venues in a cluster. Rather they have been made the same size to display the sparsity/density, such that all 3 clusters are visible on the map and the venues can still be differentiated. 

### Let's check how many venues does each cluster contain

In [49]:
cluster_list.groupby('label').count()

Unnamed: 0_level_0,name,lat,lng
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,84,84,84
1,49,49,49
2,17,17,17


--------------------------------------------------------------------------------------------------


# CONCLUSION

The three clusters have different number of venues. 
Cluster number 1 has 84 venues.
Cluster number 2 has 49 venues.
Cluster number 3 has 17 venues.

Since 3rd cluster has only 17 venues and is also much sparse, it would not make much sense to book a hotel room in that area. Number of hotels in that area is also low. There won't be much choice for hotel either.

Cluster 2 and 3, both have lot of venues available. Cluster 2 has a lot of venues right along the center. Cluster 3 has a little less than twice the number of venues than Cluster 3. 

Best place to stay would be near the center of Cluster 1 or CLuster 2. 