## Scraping the wikipedia

### 1. Import libraries

In [1]:

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Libraries imported.


### 2. Scrap data from Wikipedia page into a DataFrame

In [2]:
# get the response in the form of html
wikiurl="https://en.wikipedia.org/wiki/List_of_cities_in_India_by_population"
table_class="wikitable sortable jquery-tablesorter"
response=requests.get(wikiurl).text
response

'\n<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title>List of cities in India by population - Wikipedia</title>\n<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"d31f8466-736a-46ed-abcd-6c37a08fb541","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_cities_in_India_by_population","wgTitle":"List of cities in India by population","wgCurRevisionId":967782741,"wgRevisionId":967782741,"wgArticleId":4021386,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Wikipedia indefinitely semi-protected pages","Use dmy d

In [3]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(response, 'html.parser')
indiatable=soup.find('table',{'class':"wikitable"})

In [4]:
df=pd.read_html(str(indiatable))

In [5]:
# convert list to dataframe
df=pd.DataFrame(df[0])

In [6]:
df.head()

Unnamed: 0,Rank,UA[a],State/Territory,Population (2011)[4],Population (2001)[3],Population (1991)[3]
0,1,Mumbai,Maharashtra,18394912,16434386.0,12596243.0
1,2,Delhi,Delhi,16349831,12877470.0,8419084.0
2,3,Kolkata,West Bengal,14112536,13205697.0,11021918.0
3,4,Chennai,Tamil Nadu,8696010,6560242.0,5421985.0
4,5,Bangalore,Karnataka,8520435,5701446.0,4130288.0


In [7]:
# drop the unwanted columns
data = df.drop(["Rank", "Population (2001)[3]", "Population (1991)[3]"], axis=1)

In [8]:
# rename columns for ease
data = data.rename(columns={"UA[a]": "Neighborhood","State/Territory": "State","Population (2011)[4]": "Population"})
data.head()

Unnamed: 0,Neighborhood,State,Population
0,Mumbai,Maharashtra,18394912
1,Delhi,Delhi,16349831
2,Kolkata,West Bengal,14112536
3,Chennai,Tamil Nadu,8696010
4,Bangalore,Karnataka,8520435


In [9]:
data.shape

(100, 3)

### 3. Get the geographical coordinates

In [10]:
# define a function to get coordinates
def get_latlng(neighborhood,state):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, {}, India'.format(neighborhood,state))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [11]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(x,y) for x,y in zip(data["Neighborhood"].tolist(),data["State"].tolist()) ]

In [106]:
print(coords)
print(len(coords))

[[18.940170000000023, 72.83483000000007], [28.634100000000046, 77.21689000000003], [22.570530000000076, 88.37124000000006], [13.083620000000053, 80.28252000000003], [12.966180000000065, 77.58690000000007], [17.394870000000026, 78.47076000000004], [23.027760000000058, 72.60027000000008], [18.504220000000032, 73.85302000000007], [21.185780000000022, 72.83679000000006], [26.925730000000044, 75.80659000000003], [26.43562000000003, 80.32986000000005], [26.85471000000007, 80.92135000000007], [21.157050000000027, 79.08217000000008], [28.662490000000048, 77.43777000000006], [22.716220000000078, 75.86512000000005], [10.994160000000022, 76.96629000000007], [9.936010000000067, 76.26142000000004], [25.601290000000063, 85.13751000000008], [11.25881000000004, 75.78084000000007], [23.26466000000005, 77.40518000000003], [10.510820000000024, 76.21121000000005], [22.30946000000006, 73.17993000000007], [27.192170000000033, 78.00007000000005], [17.719840000000033, 83.26278000000008], [11.042850000000044, 

In [12]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
# merge the coordinates into the original dataframe
data['Latitude'] = df_coords['Latitude']
data['Longitude'] = df_coords['Longitude']
data.head(10)

Unnamed: 0,Neighborhood,State,Population,Latitude,Longitude
0,Mumbai,Maharashtra,18394912,18.94017,72.83483
1,Delhi,Delhi,16349831,28.6341,77.21689
2,Kolkata,West Bengal,14112536,22.57053,88.37124
3,Chennai,Tamil Nadu,8696010,13.08362,80.28252
4,Bangalore,Karnataka,8520435,12.96618,77.5869
5,Hyderabad,Telangana,7749334,17.39487,78.47076
6,Ahmedabad,Gujarat,6361084,23.02776,72.60027
7,Pune,Maharashtra,5057709,18.50422,73.85302
8,Surat,Gujarat,4591246,21.18578,72.83679
9,Jaipur,Rajasthan,3073350,26.92573,75.80659


In [13]:
#drop the state column as it is not required anymore
indf = data.drop("State", axis=1)
indf.head()

Unnamed: 0,Neighborhood,Population,Latitude,Longitude
0,Mumbai,18394912,18.94017,72.83483
1,Delhi,16349831,28.6341,77.21689
2,Kolkata,14112536,22.57053,88.37124
3,Chennai,8696010,13.08362,80.28252
4,Bangalore,8520435,12.96618,77.5869


In [14]:
# save the DataFrame as CSV file
indf.to_csv("dataset.csv", index=False)

### 4. Create a map of India with neighborhoods superimposed on top

In [15]:
# get the coordinates of India
address = 'India'

geolocator = Nominatim(user_agent="newuser")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of India is {}, {}.'.format(latitude, longitude))

The geograpical coordinate of India is 22.3511148, 78.6677428.


In [16]:
# create map of India using latitude and longitude values
map_kl = folium.Map(location=[latitude, longitude], zoom_start=5)

# add markers to map
for lat, lng, neighborhood in zip(indf['Latitude'], indf['Longitude'], indf['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_kl)  
    
map_kl

In [115]:
# save the map as HTML file
map_kl.save('map_india.html')

### 5. Use the Foursquare API to explore the neighborhoods

In [1]:
# define Foursquare Credentials and Version
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 
CLIENT_SECRET:


#### Now, let's get the top 100 venues that are within a radius of 5 kilometers.

In [20]:
radius = 5000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(indf['Latitude'], indf['Longitude'], indf['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [21]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(3561, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Mumbai,18.94017,72.83483,Royal China,18.938715,72.832933,Chinese Restaurant
1,Mumbai,18.94017,72.83483,Starbucks,18.93219,72.833959,Coffee Shop
2,Mumbai,18.94017,72.83483,Britannia & Co.,18.934683,72.840183,Parsi Restaurant
3,Mumbai,18.94017,72.83483,Food for Thought,18.932031,72.831667,Café
4,Mumbai,18.94017,72.83483,Sachin Tendulkar Stand,18.939601,72.825633,Cricket Ground


#### Let's check how many venues were returned for each neighorhood

In [22]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agra,40,40,40,40,40,40
Ahmedabad,100,100,100,100,100,100
Ajmer,9,9,9,9,9,9
Aligarh,6,6,6,6,6,6
Allahabad,16,16,16,16,16,16
Amravati,6,6,6,6,6,6
Amritsar,45,45,45,45,45,45
Asansol,5,5,5,5,5,5
Aurangabad,30,30,30,30,30,30
Bangalore,100,100,100,100,100,100


#### Let's find out how many unique categories can be curated from all the returned venues

In [23]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 232 uniques categories.


In [24]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:]

array(['Chinese Restaurant', 'Coffee Shop', 'Parsi Restaurant', 'Café',
       'Cricket Ground', 'Scenic Lookout', 'Ice Cream Shop',
       'Indian Restaurant', 'Seafood Restaurant', 'Bakery', 'Art Gallery',
       'Beach', 'Hotel', 'Clothing Store', 'Boutique', 'Pub',
       'Gym / Fitness Center', 'Pizza Place', 'Dessert Shop',
       'Middle Eastern Restaurant', 'History Museum',
       'Italian Restaurant', 'Diner', 'Gift Shop', 'Lounge', 'Nightclub',
       'Fast Food Restaurant', 'New American Restaurant',
       'Performing Arts Venue', 'Monument / Landmark', 'Theater',
       'Asian Restaurant', 'Bar', 'Japanese Restaurant', 'Restaurant',
       'Juice Bar', 'Thai Restaurant', 'Music Venue', 'Spa', 'Brewery',
       'Golf Course', 'Bookstore', 'Park', 'Donut Shop', 'Deli / Bodega',
       'Club House', 'Sandwich Place', 'Stadium', 'Bengali Restaurant',
       "Men's Store", 'Vegetarian / Vegan Restaurant', 'Plaza',
       'South Indian Restaurant', 'Food Truck', 'Bistro',
     

In [28]:
# looking for the most populary venue category
count = venues_df.groupby(['VenueCategory']).count() 
count.sort_values(["Neighborhood"], inplace=True,ascending=False)
count

Unnamed: 0_level_0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude
VenueCategory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Indian Restaurant,435,435,435,435,435,435
Hotel,289,289,289,289,289,289
Café,226,226,226,226,226,226
Fast Food Restaurant,166,166,166,166,166,166
Pizza Place,162,162,162,162,162,162
Shopping Mall,124,124,124,124,124,124
Multiplex,121,121,121,121,121,121
Coffee Shop,112,112,112,112,112,112
Ice Cream Shop,103,103,103,103,103,103
Bakery,91,91,91,91,91,91


In [29]:
# check if the results contain "Hotel"
"Hotel" in venues_df['VenueCategory'].unique()

True

### 6. Analyze Each Neighborhood

In [30]:
# one hot encoding
in_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
in_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [in_onehot.columns[-1]] + list(in_onehot.columns[:-1])
in_onehot = in_onehot[fixed_columns]

print(in_onehot.shape)
in_onehot.head()


(3561, 233)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Service,Airport Terminal,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Beach,Bed & Breakfast,Bengali Restaurant,Big Box Store,Bistro,Board Shop,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Buffet,Burger Joint,Burmese Restaurant,Burrito Place,Bus Station,Business Service,Cafeteria,Café,Cajun / Creole Restaurant,Campground,Cave,Chaat Place,Chinese Restaurant,Chocolate Shop,City,Clothing Store,Club House,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Convenience Store,Cosmetics Shop,Cricket Ground,Cuban Restaurant,Cupcake Shop,Dairy Store,Deli / Bodega,Department Store,Dessert Shop,Dhaba,Diner,Donut Shop,Dumpling Restaurant,Electronics Store,Event Space,Exhibit,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Fish & Chips Shop,Flea Market,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,General Entertainment,General Travel,Gift Shop,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Heliport,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hyderabadi Restaurant,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Indie Theater,Intersection,Irani Cafe,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karnataka Restaurant,Kebab Restaurant,Kerala Restaurant,Kids Store,Korean Restaurant,Lake,Light Rail Station,Lighthouse,Lounge,Maharashtrian Restaurant,Market,Mattress Store,Mediterranean Restaurant,Memorial Site,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Molecular Gastronomy Restaurant,Monument / Landmark,Motel,Motorcycle Shop,Mountain,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Museum,Music Venue,Neighborhood,New American Restaurant,Nightclub,Nightlife Spot,North Indian Restaurant,Northeast Indian Restaurant,Optical Shop,Organic Grocery,Other Great Outdoors,Other Nightlife,Outdoors & Recreation,Palace,Park,Parsi Restaurant,Performing Arts Venue,Persian Restaurant,Pharmacy,Pier,Pizza Place,Planetarium,Platform,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Punjabi Restaurant,Racetrack,Recreation Center,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Road,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Sculpture Garden,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Sikh Temple,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,South Indian Restaurant,Southern / Soul Food Restaurant,Spa,Spiritual Center,Sports Bar,Stadium,Stationery Store,Steakhouse,Street Food Gathering,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Temple,Tex-Mex Restaurant,Thai Restaurant,Theater,Tibetan Restaurant,Toy / Game Store,Track Stadium,Trail,Train Station,Turkish Restaurant,Udupi Restaurant,Vegetarian / Vegan Restaurant,Video Store,Watch Shop,Water Park,Wine Bar,Wings Joint,Women's Store,Zoo
0,Mumbai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Mumbai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Mumbai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Mumbai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Mumbai,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [31]:
in_grouped = in_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(in_grouped.shape)
in_grouped

(100, 233)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Service,Airport Terminal,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Beach,Bed & Breakfast,Bengali Restaurant,Big Box Store,Bistro,Board Shop,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Buffet,Burger Joint,Burmese Restaurant,Burrito Place,Bus Station,Business Service,Cafeteria,Café,Cajun / Creole Restaurant,Campground,Cave,Chaat Place,Chinese Restaurant,Chocolate Shop,City,Clothing Store,Club House,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Convenience Store,Cosmetics Shop,Cricket Ground,Cuban Restaurant,Cupcake Shop,Dairy Store,Deli / Bodega,Department Store,Dessert Shop,Dhaba,Diner,Donut Shop,Dumpling Restaurant,Electronics Store,Event Space,Exhibit,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Fish & Chips Shop,Flea Market,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,General Entertainment,General Travel,Gift Shop,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Heliport,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hyderabadi Restaurant,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Indie Theater,Intersection,Irani Cafe,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karnataka Restaurant,Kebab Restaurant,Kerala Restaurant,Kids Store,Korean Restaurant,Lake,Light Rail Station,Lighthouse,Lounge,Maharashtrian Restaurant,Market,Mattress Store,Mediterranean Restaurant,Memorial Site,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Molecular Gastronomy Restaurant,Monument / Landmark,Motel,Motorcycle Shop,Mountain,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Museum,Music Venue,Neighborhood,New American Restaurant,Nightclub,Nightlife Spot,North Indian Restaurant,Northeast Indian Restaurant,Optical Shop,Organic Grocery,Other Great Outdoors,Other Nightlife,Outdoors & Recreation,Palace,Park,Parsi Restaurant,Performing Arts Venue,Persian Restaurant,Pharmacy,Pier,Pizza Place,Planetarium,Platform,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Punjabi Restaurant,Racetrack,Recreation Center,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Road,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Sculpture Garden,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Sikh Temple,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,South Indian Restaurant,Southern / Soul Food Restaurant,Spa,Spiritual Center,Sports Bar,Stadium,Stationery Store,Steakhouse,Street Food Gathering,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Temple,Tex-Mex Restaurant,Thai Restaurant,Theater,Tibetan Restaurant,Toy / Game Store,Track Stadium,Trail,Train Station,Turkish Restaurant,Udupi Restaurant,Vegetarian / Vegan Restaurant,Video Store,Watch Shop,Water Park,Wine Bar,Wings Joint,Women's Store,Zoo
0,Agra,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.075,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.075,0.0,0.0,0.0,0.0,0.275,0.0,0.0,0.0,0.1,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Ahmedabad,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.08,0.01,0.0,0.03,0.13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01
2,Ajmer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Aligarh,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Allahabad,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1875,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Amravati,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Amritsar,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.044444,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.088889,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044444,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Asansol,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Aurangabad,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.033333,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Bangalore,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.02,0.0,0.02,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.11,0.0,0.0,0.06,0.13,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.01,0.02,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0


In [34]:
len(in_grouped[in_grouped["Hotel"] > 0.00])

72

#### Create a new DataFrame for Train Station data only

In [36]:
in_train = in_grouped[["Neighborhoods","Hotel"]]
in_train.head()

Unnamed: 0,Neighborhoods,Hotel
0,Agra,0.275
1,Ahmedabad,0.08
2,Ajmer,0.111111
3,Aligarh,0.0
4,Allahabad,0.0625


### 7. Cluster Neighborhoods

#### Run k-means to cluster the neighborhoods in Indian Cities into 3 clusters.

In [37]:
# set number of clusters
kclusters = 3

kl_clustering = in_train.drop(["Neighborhoods"], 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 1, 0, 1, 1, 0, 0, 1, 0, 0])

In [38]:
# create a new dataframe that includes the cluster.
in_merged = in_train.copy()

# add clustering labels
in_merged["Cluster Labels"] = kmeans.labels_

In [39]:
in_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
in_merged.head()

Unnamed: 0,Neighborhood,Hotel,Cluster Labels
0,Agra,0.275,2
1,Ahmedabad,0.08,1
2,Ajmer,0.111111,0
3,Aligarh,0.0,1
4,Allahabad,0.0625,1


In [40]:
# merge in_grouped with india_data to add latitude/longitude for each neighborhood
in_merged = in_merged.join(indf.set_index("Neighborhood"), on="Neighborhood")

print(in_merged.shape)
in_merged.head() # check the last columns!

(100, 6)


Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Population,Latitude,Longitude
0,Agra,0.275,2,1760285,27.19217,78.00007
1,Ahmedabad,0.08,1,6361084,23.02776,72.60027
2,Ajmer,0.111111,0,551360,26.46553,74.63169
3,Aligarh,0.0,1,911223,27.88625,78.07385
4,Allahabad,0.0625,1,1216719,25.43609,81.84718


In [41]:
# sort the results by Cluster Labels
print(in_merged.shape)
in_merged.sort_values(["Cluster Labels"], inplace=True)
in_merged

(100, 6)


Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Population,Latitude,Longitude
99,Warangal,0.166667,0,759594,17.98405,79.60205
83,Siliguri,0.117647,0,705579,26.73244,88.40871
50,Kochi,0.123596,0,2119724,9.93601,76.26142
98,Visakhapatnam,0.193548,0,1728128,17.71984,83.26278
26,Durgapur,0.111111,0,581409,23.53232,87.30735
78,Ranchi,0.090909,0,1126741,23.3506,85.31378
47,Jodhpur,0.19403,0,1138300,26.26691,73.03052
46,Jhansi,0.111111,0,549391,25.44858,78.56955
84,Solapur,0.2,0,951558,17.65804,75.90685
59,Madurai,0.122807,0,1465625,9.92417,78.12416


#### Finally, let's visualize the resulting clusters

In [43]:
# create map
import numpy as np
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=5)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(in_merged['Latitude'], in_merged['Longitude'], in_merged['Neighborhood'], in_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [160]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

### 8. Examine Clusters

#### Cluster-0

In [44]:
in_merged.loc[in_merged['Cluster Labels'] == 0]


Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Population,Latitude,Longitude
99,Warangal,0.166667,0,759594,17.98405,79.60205
83,Siliguri,0.117647,0,705579,26.73244,88.40871
50,Kochi,0.123596,0,2119724,9.93601,76.26142
98,Visakhapatnam,0.193548,0,1728128,17.71984,83.26278
26,Durgapur,0.111111,0,581409,23.53232,87.30735
78,Ranchi,0.090909,0,1126741,23.3506,85.31378
47,Jodhpur,0.19403,0,1138300,26.26691,73.03052
46,Jhansi,0.111111,0,549391,25.44858,78.56955
84,Solapur,0.2,0,951558,17.65804,75.90685
59,Madurai,0.122807,0,1465625,9.92417,78.12416


#### Cluster-1

In [45]:
in_merged.loc[in_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Population,Latitude,Longitude
67,Mysore,0.07,1,990900,12.30906,76.65303
65,Mumbai,0.04,1,18394912,18.94017,72.83483
62,Mangalore,0.038961,1,623841,12.89785,74.84541
64,Moradabad,0.0,1,889810,28.83893,78.77684
66,Muzaffarnagar,0.0,1,495543,29.47029,77.70761
75,Pune,0.03,1,5057709,18.50422,73.85302
69,Nanded,0.0,1,550564,19.15566,77.31105
97,Vijayawada,0.078125,1,1491202,16.50256,80.63977
96,Vellore,0.076923,1,484690,12.91356,79.13251
95,Vasai-Virar,0.0,1,1222390,19.20505,72.9553


#### Cluster-2

In [46]:
in_merged.loc[in_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Population,Latitude,Longitude
79,Raurkela,0.428571,2,552970,22.22972,84.86077
41,Jaipur,0.3,2,3073350,26.92573,75.80659
17,Bikaner,0.444444,2,647804,28.01647,73.31184
0,Agra,0.275,2,1760285,27.19217,78.00007
