## IBM Data Science Professional Course
## Coursera Capstone Project
## *The Battle of Neighborhoods*
# **STARTING A NEW INDIAN RESTAURANT IN DELHI, INDIA**

#### First we will import all the libraries we need. 

In [1]:
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
import numpy as np
import requests
from bs4 import BeautifulSoup

In [2]:
import json
from pandas.io.json import json_normalize

In [3]:
!pip install geocoder
import geocoder
from geopy.geocoders import Nominatim

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 8.9MB/s ta 0:00:011
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [4]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [5]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.4.1               |             py_0          26 KB  conda-forge
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ------------------------------------------------------------
                       

In [6]:
from sklearn.cluster import KMeans

#### Now, Let's scrap the data from the wikipedia page into a dataframe. We will use BeautifulSoup package.

In [7]:
Link = requests.get('https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Delhi').text

In [8]:
Soup = BeautifulSoup(Link, 'html.parser')

In [9]:
neighborhoodList = []

In [10]:
for row in Soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)

In [11]:
Delhi_df = pd.DataFrame({'Neighborhood': neighborhoodList})
Delhi_df

Unnamed: 0,Neighborhood
0,Neighbourhoods of Delhi
1,Ashok Nagar (Delhi)
2,Ashok Vihar
3,Ashram Chowk
4,Babarpur
5,"Badarpur, Delhi"
6,Bali Nagar
7,Bawana
8,Ber Sarai
9,Bhajanpura


#### Let's drop the row with 'Neighborhoods of Delhi' which is unnecessary.

In [12]:
Delhi_df = Delhi_df[Delhi_df.Neighborhood != "Neighbourhoods of Delhi"].reset_index(drop = True)
Delhi_df.head(11)

Unnamed: 0,Neighborhood
0,Ashok Nagar (Delhi)
1,Ashok Vihar
2,Ashram Chowk
3,Babarpur
4,"Badarpur, Delhi"
5,Bali Nagar
6,Bawana
7,Ber Sarai
8,Bhajanpura
9,Chanakyapuri


#### Let's print the number of rows in the Dataframe.

In [13]:
Delhi_df.shape

(138, 1)

#### Now, we will find latitude and longitude coordinates of Delhi, India using Geocoder.

In [14]:
address = 'Delhi, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Delhi are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Delhi are 28.6517178, 77.2219388.


#### Then, let's find latitude and longitude coordinates of all the neighborhoods and create a new dataframe.

In [15]:
def get_latlng(neighborhood):
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Delhi, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords


In [16]:
coords = [ get_latlng(neighborhood) for neighborhood in Delhi_df["Neighborhood"].tolist() ]
coords

[[28.692230000000052, 77.30124000000006],
 [28.69037000000003, 77.17609000000004],
 [28.710598435255907, 77.32696519316737],
 [28.50738000000007, 77.30346000000003],
 [28.50738000000007, 77.30346000000003],
 [28.65223022436032, 77.12941079026544],
 [28.800590000000057, 77.03473000000008],
 [28.549540000000036, 77.18167000000005],
 [28.699880000000064, 77.25906000000003],
 [28.595060000000046, 77.18573000000004],
 [28.656270000000063, 77.23232000000007],
 [28.67671000000007, 77.21767000000006],
 [28.633940000000052, 77.21968000000004],
 [28.60761000000008, 77.08714000000003],
 [28.654597885415757, 77.2333966005242],
 [28.62832000000003, 77.24727000000007],
 [28.60486000000003, 77.08511000000004],
 [28.560590000000047, 77.24678000000006],
 [28.57298000000003, 77.23357000000004],
 [28.591510000000028, 77.12945000000008],
 [28.699110000000076, 77.19105000000008],
 [28.592220036588714, 77.15998300657745],
 [28.684700000000078, 77.32774000000006],
 [28.679040000000043, 77.31476000000004],
 [

In [17]:
coords_df = pd.DataFrame(coords, columns = ['Latitude', 'Longitude'])

Delhi_df['Latitude'] = coords_df['Latitude']
Delhi_df['Longitude'] = coords_df['Longitude']


In [18]:
print(Delhi_df.shape)
Delhi_df

(138, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Ashok Nagar (Delhi),28.69223,77.30124
1,Ashok Vihar,28.69037,77.17609
2,Ashram Chowk,28.710598,77.326965
3,Babarpur,28.50738,77.30346
4,"Badarpur, Delhi",28.50738,77.30346
5,Bali Nagar,28.65223,77.129411
6,Bawana,28.80059,77.03473
7,Ber Sarai,28.54954,77.18167
8,Bhajanpura,28.69988,77.25906
9,Chanakyapuri,28.59506,77.18573


#### Let's draw the map of Delhi with all the neighborhoods superimposed on top with Blue coloured circles and save it as html.

In [19]:
Delhi_map = folium.Map(location = [latitude, longitude], zoom_start = 10)
for lat, lng, neighborhood in zip(Delhi_df['Latitude'], Delhi_df['Longitude'], Delhi_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7,
        parse_html = False).add_to(Delhi_map)
    
Delhi_map

In [20]:
Delhi_map.save('Delhi_map.html')

#### Now, we define Foursuare credential and version.

In [21]:
CLIENT_ID = 'Y2O034BNJIPWLDVKVERHD2EPEHJKUF13Q03LRWABZZCFQF30' 
CLIENT_SECRET = 'IWCJTV0SAI1EYNKSOFVE5DZZLNKU53FKZZI4R1Z1YEKZMIOZ'
 
VERSION = '20180605' 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: Y2O034BNJIPWLDVKVERHD2EPEHJKUF13Q03LRWABZZCFQF30
CLIENT_SECRET:IWCJTV0SAI1EYNKSOFVE5DZZLNKU53FKZZI4R1Z1YEKZMIOZ


#### Using Foursquare, the top 100 venues in 2000 meters radius will be found and create a new dataframe.

In [22]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(Delhi_df['Latitude'], Delhi_df['Longitude'], Delhi_df['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        lng,
        radius, 
        LIMIT)
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [23]:
venues_df = pd.DataFrame(venues)
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

venues_df

Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Ashok Nagar (Delhi),28.69223,77.30124,Domino's Pizza,28.70161,77.281235,Pizza Place
1,Ashok Nagar (Delhi),28.69223,77.30124,yamuna vihar,28.689816,77.283876,Park
2,Ashok Nagar (Delhi),28.69223,77.30124,Shahdara,28.682409,77.283024,Chocolate Shop
3,Ashok Nagar (Delhi),28.69223,77.30124,V-Mart,28.68939,77.287658,Clothing Store
4,Ashok Nagar (Delhi),28.69223,77.30124,Classic Ice Cream Parlour,28.688342,77.288673,Ice Cream Shop
5,Ashok Nagar (Delhi),28.69223,77.30124,Shivaji park,28.682657,77.285503,Park
6,Ashok Nagar (Delhi),28.69223,77.30124,WebPreneurs Pvt Ltd(Best SEO Company In Noida),28.694659,77.254477,IT Services
7,Ashok Nagar (Delhi),28.69223,77.30124,Axis Bank ATM,28.70464,77.25805,ATM
8,Ashok Vihar,28.69037,77.17609,Domino's Pizza,28.70161,77.281235,Pizza Place
9,Ashok Vihar,28.69037,77.17609,yamuna vihar,28.689816,77.283876,Park


In [24]:
venues_df.shape

(2182, 7)

In [25]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ashok Nagar (Delhi),8,8,8,8,8,8
Ashok Vihar,10,10,10,10,10,10
Ashram Chowk,11,11,11,11,11,11
Babarpur,4,4,4,4,4,4
"Badarpur, Delhi",4,4,4,4,4,4
Bali Nagar,7,7,7,7,7,7
Bawana,2,2,2,2,2,2
Ber Sarai,53,53,53,53,53,53
Bhajanpura,10,10,10,10,10,10
Chanakyapuri,5,5,5,5,5,5


#### Now, we will count how many unique venue categories are there.

In [26]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 77 uniques categories.


In [27]:
venues_df['VenueCategory'].unique()[:50]

array(['Pizza Place', 'Park', 'Chocolate Shop', 'Clothing Store',
       'Ice Cream Shop', 'IT Services', 'ATM', 'Mobile Phone Shop',
       'Gym / Fitness Center', 'Stadium', 'Castle', 'Historic Site',
       'Café', 'Fast Food Restaurant', 'Dessert Shop',
       'Indian Restaurant', 'American Restaurant', 'Electronics Store',
       'Lounge', 'Temple', 'Middle Eastern Restaurant', 'Coffee Shop',
       'Chinese Restaurant', 'Sandwich Place', 'Plaza', 'Hotel',
       'Gastropub', 'Diner', 'Frozen Yogurt Shop', 'Nightclub',
       'Auto Workshop', 'Food Truck', 'Thai Restaurant', 'Art Gallery',
       'Convenience Store', 'Fabric Shop', 'Italian Restaurant',
       'Salad Place', 'Athletics & Sports', 'Grocery Store',
       'Movie Theater', 'Market', 'Vegetarian / Vegan Restaurant',
       'Motorcycle Shop', 'Food Court', 'Bus Station', 'Moving Target',
       'Train Station', 'Light Rail Station', 'Arcade'], dtype=object)

In [28]:
"Neighborhood" in venues_df['VenueCategory'].unique()

True

#### Analyze each area.

In [29]:
Delhi_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

Delhi_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

fixed_columns = [Delhi_onehot.columns[-1]] + list(Delhi_onehot.columns[:-1])
Delhi_onehot = Delhi_onehot[fixed_columns]

print(Delhi_onehot.shape)
Delhi_onehot.head(11)

(2182, 78)


Unnamed: 0,Neighborhoods,ATM,American Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Athletics & Sports,Auto Workshop,Bakery,Boat or Ferry,Boutique,Burger Joint,Bus Station,Café,Castle,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Convenience Store,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Fabric Shop,Fast Food Restaurant,Food & Drink Shop,Food Court,Food Truck,Frozen Yogurt Shop,Gastropub,Gift Shop,Grocery Store,Gym,Gym / Fitness Center,Hindu Temple,Historic Site,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Juice Bar,Light Rail Station,Liquor Store,Lounge,Market,Mediterranean Restaurant,Metro Station,Middle Eastern Restaurant,Mobile Phone Shop,Motorcycle Shop,Movie Theater,Moving Target,Multiplex,Neighborhood,Nightclub,North Indian Restaurant,Park,Pizza Place,Plaza,Punjabi Restaurant,Restaurant,River,Salad Place,Sandwich Place,Sculpture Garden,Shopping Mall,Snack Place,Stadium,Tea Room,Temple,Thai Restaurant,Train Station,Vegetarian / Vegan Restaurant,Waterfront
0,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Ashok Nagar (Delhi),1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,Ashok Vihar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,Ashok Vihar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Let's group the rows by Neighborhood and by taking the mean of frequency of occurrence of each catagory.

In [30]:
Delhi_grouped = Delhi_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(Delhi_grouped.shape)
Delhi_grouped.head(11)

(138, 78)


Unnamed: 0,Neighborhoods,ATM,American Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Athletics & Sports,Auto Workshop,Bakery,Boat or Ferry,Boutique,Burger Joint,Bus Station,Café,Castle,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Convenience Store,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Fabric Shop,Fast Food Restaurant,Food & Drink Shop,Food Court,Food Truck,Frozen Yogurt Shop,Gastropub,Gift Shop,Grocery Store,Gym,Gym / Fitness Center,Hindu Temple,Historic Site,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Juice Bar,Light Rail Station,Liquor Store,Lounge,Market,Mediterranean Restaurant,Metro Station,Middle Eastern Restaurant,Mobile Phone Shop,Motorcycle Shop,Movie Theater,Moving Target,Multiplex,Neighborhood,Nightclub,North Indian Restaurant,Park,Pizza Place,Plaza,Punjabi Restaurant,Restaurant,River,Salad Place,Sandwich Place,Sculpture Garden,Shopping Mall,Snack Place,Stadium,Tea Room,Temple,Thai Restaurant,Train Station,Vegetarian / Vegan Restaurant,Waterfront
0,Ashok Nagar (Delhi),0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Ashok Vihar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Ashram Chowk,0.909091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Babarpur,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
4,"Badarpur, Delhi",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
5,Bali Nagar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Bawana,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Ber Sarai,0.0,0.0,0.0,0.018868,0.0,0.018868,0.018868,0.0,0.0,0.0,0.0,0.0,0.113208,0.0,0.037736,0.0,0.0,0.056604,0.018868,0.0,0.018868,0.018868,0.0,0.018868,0.018868,0.056604,0.0,0.0,0.018868,0.018868,0.018868,0.0,0.018868,0.0,0.0,0.0,0.018868,0.056604,0.0,0.0,0.075472,0.018868,0.0,0.0,0.0,0.0,0.056604,0.018868,0.0,0.0,0.018868,0.0,0.0,0.018868,0.0,0.0,0.0,0.018868,0.0,0.0,0.075472,0.018868,0.0,0.0,0.0,0.018868,0.037736,0.0,0.0,0.0,0.0,0.0,0.018868,0.018868,0.0,0.018868,0.0
8,Bhajanpura,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Chanakyapuri,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0


In [31]:
len(Delhi_grouped[Delhi_grouped["Indian Restaurant"] > 0])

74

#### Let's separate out the data of Indian Restaurant.

In [32]:
Delhi_Ind_Rest = Delhi_grouped[["Neighborhoods","Indian Restaurant"]]
Delhi_Ind_Rest.head(11)

Unnamed: 0,Neighborhoods,Indian Restaurant
0,Ashok Nagar (Delhi),0.0
1,Ashok Vihar,0.0
2,Ashram Chowk,0.0
3,Babarpur,0.0
4,"Badarpur, Delhi",0.0
5,Bali Nagar,0.142857
6,Bawana,0.0
7,Ber Sarai,0.075472
8,Bhajanpura,0.0
9,Chanakyapuri,0.0


### Cluster areas

In [33]:
kclusters = 3

Delhi_clustering = Delhi_Ind_Rest.drop(["Neighborhoods"], 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Delhi_clustering)

kmeans.labels_[0:20]

array([0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 1, 1, 0, 2, 1, 1, 0],
      dtype=int32)

In [34]:
Delhi_merged = Delhi_Ind_Rest.copy()

Delhi_merged["Cluster Labels"] = kmeans.labels_

Delhi_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
Delhi_merged.head(11)

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels
0,Ashok Nagar (Delhi),0.0,0
1,Ashok Vihar,0.0,0
2,Ashram Chowk,0.0,0
3,Babarpur,0.0,0
4,"Badarpur, Delhi",0.0,0
5,Bali Nagar,0.142857,1
6,Bawana,0.0,0
7,Ber Sarai,0.075472,2
8,Bhajanpura,0.0,0
9,Chanakyapuri,0.0,0


In [35]:
Delhi_merged = Delhi_merged.join(Delhi_df.set_index("Neighborhood"), on="Neighborhood")

Delhi_merged

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,Ashok Nagar (Delhi),0.0,0,28.69223,77.30124
1,Ashok Vihar,0.0,0,28.69037,77.17609
2,Ashram Chowk,0.0,0,28.710598,77.326965
3,Babarpur,0.0,0,28.50738,77.30346
4,"Badarpur, Delhi",0.0,0,28.50738,77.30346
5,Bali Nagar,0.142857,1,28.65223,77.129411
6,Bawana,0.0,0,28.80059,77.03473
7,Ber Sarai,0.075472,2,28.54954,77.18167
8,Bhajanpura,0.0,0,28.69988,77.25906
9,Chanakyapuri,0.0,0,28.59506,77.18573


In [36]:
Delhi_merged.shape

(138, 5)

#### Now, we wll visualize the clusters.

In [37]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(Delhi_merged['Latitude'], Delhi_merged['Longitude'], Delhi_merged['Neighborhood'], Delhi_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [38]:
map_clusters.save('map_clusters.html')

In [39]:
Delhi_merged.loc[Delhi_merged['Cluster Labels'] == 0, Delhi_merged.columns[[1] + list(range(5, Delhi_merged.shape[1]))]]

Unnamed: 0,Indian Restaurant
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0
6,0.0
8,0.0
9,0.0
11,0.0
12,0.0


In [40]:
Delhi_merged.loc[Delhi_merged['Cluster Labels'] == 1, Delhi_merged.columns[[1] + list(range(5, Delhi_merged.shape[1]))]]

Unnamed: 0,Indian Restaurant
5,0.142857
10,0.166667
13,0.1
14,0.125
17,0.121951
18,0.103448
30,0.111111
34,0.125
45,0.111111
49,0.142857


In [41]:
Delhi_merged.loc[Delhi_merged['Cluster Labels'] == 2, Delhi_merged.columns[[1] + list(range(5, Delhi_merged.shape[1]))]]

Unnamed: 0,Indian Restaurant
7,0.075472
16,0.055556
25,0.066667
28,0.076923
31,0.074074
32,0.078431
33,0.086957
35,0.073171
36,0.054545
40,0.083333


In [42]:
Delhi_merged.loc[Delhi_merged['Cluster Labels'] == 3, Delhi_merged.columns[[1] + list(range(5, Delhi_merged.shape[1]))]]

Unnamed: 0,Indian Restaurant


In [43]:
Delhi_merged_1 = Delhi_merged.head(11)
Delhi_merged_1

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,Ashok Nagar (Delhi),0.0,0,28.69223,77.30124
1,Ashok Vihar,0.0,0,28.69037,77.17609
2,Ashram Chowk,0.0,0,28.710598,77.326965
3,Babarpur,0.0,0,28.50738,77.30346
4,"Badarpur, Delhi",0.0,0,28.50738,77.30346
5,Bali Nagar,0.142857,1,28.65223,77.129411
6,Bawana,0.0,0,28.80059,77.03473
7,Ber Sarai,0.075472,2,28.54954,77.18167
8,Bhajanpura,0.0,0,28.69988,77.25906
9,Chanakyapuri,0.0,0,28.59506,77.18573


In [44]:
Delhi_Food = Delhi_onehot[["Neighborhoods","Indian Restaurant", "Pizza Place", "Fast Food Restaurant", "American Restaurant", "Middle Eastern Restaurant", "Chinese Restaurant", "Sandwich Place", "Hotel", "Diner", "Food Truck", "Thai Restaurant", "Italian Restaurant", "Salad Place", "Vegetarian / Vegan Restaurant", "Food Court"]]
Delhi_Food.head(11)

Unnamed: 0,Neighborhoods,Indian Restaurant,Pizza Place,Fast Food Restaurant,American Restaurant,Middle Eastern Restaurant,Chinese Restaurant,Sandwich Place,Hotel,Diner,Food Truck,Thai Restaurant,Italian Restaurant,Salad Place,Vegetarian / Vegan Restaurant,Food Court
0,Ashok Nagar (Delhi),0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Ashok Nagar (Delhi),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,Ashok Vihar,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
9,Ashok Vihar,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [45]:
Ind_Rest = Delhi_Food['Indian Restaurant'].sum()
Ind_Rest

145

In [46]:
Pizza_Place = Delhi_Food['Pizza Place'].sum()
Pizza_Place

182

In [47]:
Ff_Rest = Delhi_Food['Fast Food Restaurant'].sum()
Ff_Rest

94

In [48]:
Amer_Rest = Delhi_Food['American Restaurant'].sum()
Amer_Rest

1

In [49]:
Me_Rest = Delhi_Food['Middle Eastern Restaurant'].sum()
Me_Rest

27

In [50]:
Chin_Rest = Delhi_Food['Chinese Restaurant'].sum()
Chin_Rest

41

In [51]:
Sand_Place = Delhi_Food['Sandwich Place'].sum()
Sand_Place

42

In [52]:
Hotel = Delhi_Food['Hotel'].sum()
Hotel

121

In [53]:
Diner = Delhi_Food['Diner'].sum()
Diner

27

In [54]:
Food_Truck = Delhi_Food['Food Truck'].sum()
Food_Truck

26

In [55]:
Thai_Rest = Delhi_Food['Thai Restaurant'].sum()
Thai_Rest

27

In [56]:
Ital_Rest = Delhi_Food['Italian Restaurant'].sum()
Ital_Rest

27

In [57]:
Sal_Place = Delhi_Food['Salad Place'].sum()
Sal_Place

17

In [58]:
Veg_Rest = Delhi_Food['Vegetarian / Vegan Restaurant'].sum()
Veg_Rest

18

In [59]:
Food_Court = Delhi_Food['Food Court'].sum()
Food_Court

13

In [60]:
Food_Data = {'Category': ['Indian Restaurant', 'Pizza Place', 'Fast Food Restaurant', 'American Restaurant', 'Middle Eastern Restaurant', 'Chinese Restaurant', 'Sandwich Place', 'Hotel', 'Diner', 'Food Truck', 'Thai Restaurant', 'Italian Restaurant', 'Salad place', 'Vegetarian / Vegan Restaurant', 'Food Court'],
             'Total': ['145', '182', '94', '1', '27', '41', '42', '121', '27', '26', '27', '27', '17', '18', '13']}
Delhi_Food = pd.DataFrame(Food_Data, columns = ['Category', 'Total'])
Delhi_Food

Unnamed: 0,Category,Total
0,Indian Restaurant,145
1,Pizza Place,182
2,Fast Food Restaurant,94
3,American Restaurant,1
4,Middle Eastern Restaurant,27
5,Chinese Restaurant,41
6,Sandwich Place,42
7,Hotel,121
8,Diner,27
9,Food Truck,26


In [61]:
Foreign_Food_Data = {'Category': ['Pizza Place', 'Fast Food Restaurant', 'American Restaurant', 'Middle Eastern Restaurant', 'Chinese Restaurant', 'Sandwich Place', 'Thai Restaurant', 'Italian Restaurant', 'Salad place'],
                'Total': ['182', '94', '1', '27', '41', '42', '27', '27', '17']}
Foreign_Food_df = pd.DataFrame(Foreign_Food_Data, columns = ['Category', 'Total'])
Foreign_Food_df

Unnamed: 0,Category,Total
0,Pizza Place,182
1,Fast Food Restaurant,94
2,American Restaurant,1
3,Middle Eastern Restaurant,27
4,Chinese Restaurant,41
5,Sandwich Place,42
6,Thai Restaurant,27
7,Italian Restaurant,27
8,Salad place,17


In [62]:
Indian_Food = 145

In [63]:
Foreign_Food = 182 + 94 +1 + 27 + 41 + 42 + 27 + 27 + 17
Foreign_Food

458

In [64]:
Both_Food = 121 + 27 + 26 + 18 + 13
Both_Food

205

In [65]:
Food = {'Type of food': ['Indian', 'Foreign', 'Both'],
        'Number of Restaurants': ['145', '458', '205']}
Restaurant_df = pd.DataFrame(Food, columns = ['Type of food', 'Number of Restaurants'])
Restaurant_df

Unnamed: 0,Type of food,Number of Restaurants
0,Indian,145
1,Foreign,458
2,Both,205
