# Opening a Hotel in Chennai, India


## 1. Import the Required Libraries

In [1]:
#pip install folium

In [2]:
#pip install geocoder

In [3]:
# importing Libraries
import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values # to get coordinates

import requests
import geocoder# library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries  are imported.")

Libraries  are imported.


## Section 2: Data Scrapping

In [4]:
# send the GET request
data = requests.get("https://commons.wikimedia.org/wiki/Category:Suburbs_of_Chennai")
if data.status_code == 200:
    print('Page download successful')
else:
    print('Page download error. Error code: {}'.format(page.status_code))
data=data.text

Page download successful


In [5]:
# parse data from the html into a beautifulsoup object

soup = BeautifulSoup(data, 'html.parser')
# create a list to store neighborhood data
neighborhoodList = []
# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)
    
    # create a new DataFrame from the list
ch_df = pd.DataFrame({"Neighborhood": neighborhoodList})
print("Total no of neighborhoods:",ch_df.shape)
ch_df.head()


Total no of neighborhoods: (63, 1)


Unnamed: 0,Neighborhood
0,► Adambakkam‎ (13 F)
1,"► Adyar‎ (5 C, 17 F)"
2,► Alwarthirunagar‎ (9 F)
3,"► Ambattur‎ (1 C, 10 F)"
4,"► Anna Nagar‎ (2 C, 6 F)"



## 3. Adding Lattitude and Longitude to the dataframe

In [6]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Chennai, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [7]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in ch_df["Neighborhood"].tolist() ]
coords[0:5]

[[12.99192000000005, 80.20603000000006],
 [12.97814999845724, 80.18882999205442],
 [13.050550000000044, 80.18397000000004],
 [13.129079995271326, 80.16889003113329],
 [12.976730020240282, 80.1439999696538]]

In [8]:
ch_df['Latitude'], ch_df['Longitude'] = zip(*ch_df['Neighborhood'].apply(get_latlng))
ch_df

Unnamed: 0,Neighborhood,Latitude,Longitude
0,► Adambakkam‎ (13 F),12.99192,80.20603
1,"► Adyar‎ (5 C, 17 F)",12.97815,80.18883
2,► Alwarthirunagar‎ (9 F),13.05055,80.18397
3,"► Ambattur‎ (1 C, 10 F)",13.12908,80.16889
4,"► Anna Nagar‎ (2 C, 6 F)",12.97673,80.144
5,"► Anna Salai‎ (4 C, 32 F)",13.12599,80.05945
6,"► Ashok Nagar, Chennai‎ (17 F)",12.97759,80.14386
7,► Assisi Nagar‎ (2 F),13.16546,80.23411
8,"► Besant Nagar‎ (3 C, 6 F)",13.00017,80.25766
9,"► Chepauk‎ (3 C, 6 F)",13.0644,80.28065


In [9]:
ch_df.shape

(63, 3)

In [10]:
# save the DataFrame as CSV file
ch_df.to_csv("ch_df.csv", index=False)


## 4. Map Visualization - Neighborhood of Chennai

In [11]:
# get the coordinates of Chennai
address = 'Chennai, India'

geolocator = Nominatim(user_agent="jeslyjose25@gmail.com")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Chennai, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Chennai, India 13.0836939, 80.270186.


In [12]:
# create map of Chennai using latitude and longitude values
map_ch = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(ch_df['Latitude'], ch_df['Longitude'], ch_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_ch)  
    
print("Neighborhood of Chennai")  
map_ch

Neighborhood of Chennai


In [13]:
# save the map as HTML file
map_ch.save('map_ch.html')

## 5. Pull the data from FourSquare APIs

In [14]:
# define Foursquare Credentials and Version
CLIENT_ID = 'CZFFPD3LE5DKF2OLOBSBKV43DHQFOAVA2KS5D01NCCGFNV1C' # your Foursquare ID
CLIENT_SECRET = 'PVFATCCUHGOS04TKNIYISDIA2KHINGX2LDCQGTMY23VJHLAN' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: CZFFPD3LE5DKF2OLOBSBKV43DHQFOAVA2KS5D01NCCGFNV1C
CLIENT_SECRET:PVFATCCUHGOS04TKNIYISDIA2KHINGX2LDCQGTMY23VJHLAN


#### Now, let's get the top 100 venues that are in Chennai , Rouge within a radius of 500 meters

In [15]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(ch_df['Latitude'], ch_df['Longitude'], ch_df['Neighborhood']):
    
    # Pull the nearby venues
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [16]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1974, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,► Adambakkam‎ (13 F),12.99192,80.20603,Luxe Cinemas,12.991041,80.216962,Multiplex
1,► Adambakkam‎ (13 F),12.99192,80.20603,Phoenix Market City,12.99171,80.217297,Shopping Mall
2,► Adambakkam‎ (13 F),12.99192,80.20603,Mainland China,12.991028,80.217084,Chinese Restaurant
3,► Adambakkam‎ (13 F),12.99192,80.20603,IMAX®,12.990639,80.21631,Multiplex
4,► Adambakkam‎ (13 F),12.99192,80.20603,Rajdhani,12.991081,80.217003,Rajasthani Restaurant


In [17]:
venues_df.groupby(["Neighborhood"]).count()


Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
► Adambakkam‎ (13 F),71,71,71,71,71,71
"► Adyar‎ (5 C, 17 F)",22,22,22,22,22,22
► Alwarthirunagar‎ (9 F),35,35,35,35,35,35
"► Ambattur‎ (1 C, 10 F)",5,5,5,5,5,5
"► Anna Nagar‎ (2 C, 6 F)",8,8,8,8,8,8
"► Anna Salai‎ (4 C, 32 F)",5,5,5,5,5,5
"► Ashok Nagar, Chennai‎ (17 F)",7,7,7,7,7,7
► Assisi Nagar‎ (2 F),5,5,5,5,5,5
"► Besant Nagar‎ (3 C, 6 F)",90,90,90,90,90,90
"► Chepauk‎ (3 C, 6 F)",65,65,65,65,65,65



##### Let's find out how many unique categories can be curated from all the returned venues

In [18]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 170 uniques categories.


In [19]:
# print out the list of categories
venue = venues_df['VenueCategory'].unique()[:50]
venue

array(['Multiplex', 'Shopping Mall', 'Chinese Restaurant',
       'Rajasthani Restaurant', 'Clothing Store', 'Pizza Place',
       'Vegetarian / Vegan Restaurant', 'Café', 'Sandwich Place',
       'Indian Restaurant', "Women's Store", 'Snack Place', 'Hotel',
       'Restaurant', 'Ice Cream Shop', 'Donut Shop', "Men's Store", 'Bar',
       'Coffee Shop', 'Train Station', 'BBQ Joint', 'Department Store',
       'Punjabi Restaurant', 'Frozen Yogurt Shop',
       'South Indian Restaurant', 'Mexican Restaurant',
       'Japanese Restaurant', 'Fast Food Restaurant', 'Church',
       'Mediterranean Restaurant', 'Italian Restaurant',
       'Asian Restaurant', 'Burger Joint', 'Bakery', 'Sports Bar',
       'Market', 'IT Services', 'Breakfast Spot', 'Badminton Court',
       'Indie Movie Theater', 'Malay Restaurant', 'Lake', 'Juice Bar',
       'Grocery Store', 'Gym', 'Movie Theater', 'Bookstore',
       'Convenience Store', 'Food Truck', 'Dessert Shop'], dtype=object)

## 6. Analyze Neighborhood

Perform one-hot encoding of the business categories retrieved

In [20]:
# one hot encoding
ch_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
ch_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [ch_onehot.columns[-1]] + list(ch_onehot.columns[:-1])
ch_onehot = ch_onehot[fixed_columns]

print(ch_onehot.shape)
ch_onehot.head()

(1974, 171)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport Lounge,Airport Terminal,American Restaurant,Arcade,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Badminton Court,Bakery,Bar,Beach,Bed & Breakfast,Bengali Restaurant,Bike Rental / Bike Share,Bike Shop,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Buffet,Burger Joint,Bus Line,Bus Station,Cafeteria,Café,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Cafeteria,College Library,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Cricket Ground,Department Store,Dessert Shop,Diner,Dive Bar,Donut Shop,Electronics Store,Event Space,Farmers Market,Fast Food Restaurant,Field,Fish Market,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Gas Station,General Entertainment,Gift Shop,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Halal Restaurant,Harbor / Marina,Historic Site,Hotel,Hotel Bar,Hyderabadi Restaurant,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kebab Restaurant,Kerala Restaurant,Korean Restaurant,Lake,Light Rail Station,Lighthouse,Lounge,Malay Restaurant,Market,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Motel,Movie Theater,Multicuisine Indian Restaurant,Multiplex,Museum,Music Store,National Park,Nightclub,North Indian Restaurant,Office,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Platform,Playground,Pool,Pool Hall,Portuguese Restaurant,Pub,Punjabi Restaurant,Rajasthani Restaurant,Recreation Center,Rental Car Location,Resort,Restaurant,River,Road,Rock Club,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Snack Place,Soccer Stadium,South Indian Restaurant,Spa,Sports Bar,Sports Club,Stadium,Steakhouse,Surf Spot,Tea Room,Tennis Stadium,Thai Restaurant,Theater,Theme Park,Toy / Game Store,Trail,Train,Train Station,Vegetarian / Vegan Restaurant,Video Store,Whisky Bar,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,► Adambakkam‎ (13 F),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,► Adambakkam‎ (13 F),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,► Adambakkam‎ (13 F),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,► Adambakkam‎ (13 F),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,► Adambakkam‎ (13 F),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [21]:
ch_grouped = ch_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(ch_grouped.shape)
ch_grouped

(63, 171)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport Lounge,Airport Terminal,American Restaurant,Arcade,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Badminton Court,Bakery,Bar,Beach,Bed & Breakfast,Bengali Restaurant,Bike Rental / Bike Share,Bike Shop,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Buffet,Burger Joint,Bus Line,Bus Station,Cafeteria,Café,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Cafeteria,College Library,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Cricket Ground,Department Store,Dessert Shop,Diner,Dive Bar,Donut Shop,Electronics Store,Event Space,Farmers Market,Fast Food Restaurant,Field,Fish Market,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Gas Station,General Entertainment,Gift Shop,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Halal Restaurant,Harbor / Marina,Historic Site,Hotel,Hotel Bar,Hyderabadi Restaurant,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kebab Restaurant,Kerala Restaurant,Korean Restaurant,Lake,Light Rail Station,Lighthouse,Lounge,Malay Restaurant,Market,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Motel,Movie Theater,Multicuisine Indian Restaurant,Multiplex,Museum,Music Store,National Park,Nightclub,North Indian Restaurant,Office,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Platform,Playground,Pool,Pool Hall,Portuguese Restaurant,Pub,Punjabi Restaurant,Rajasthani Restaurant,Recreation Center,Rental Car Location,Resort,Restaurant,River,Road,Rock Club,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Snack Place,Soccer Stadium,South Indian Restaurant,Spa,Sports Bar,Sports Club,Stadium,Steakhouse,Surf Spot,Tea Room,Tennis Stadium,Thai Restaurant,Theater,Theme Park,Toy / Game Store,Trail,Train,Train Station,Vegetarian / Vegan Restaurant,Video Store,Whisky Bar,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,► Adambakkam‎ (13 F),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.028169,0.014085,0.014085,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.014085,0.0,0.0,0.0,0.042254,0.042254,0.0,0.014085,0.056338,0.0,0.042254,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042254,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.028169,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042254,0.0,0.0,0.014085,0.014085,0.098592,0.0,0.0,0.014085,0.0,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.014085,0.0,0.014085,0.0,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.028169,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.084507,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.014085,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.042254,0.0,0.0,0.0,0.014085,0.014085,0.0,0.014085,0.0,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.056338,0.0,0.0,0.014085,0.0,0.0,0.0
1,"► Adyar‎ (5 C, 17 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.045455,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.136364,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,► Alwarthirunagar‎ (9 F),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.028571,0.0,0.0,0.085714,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.028571,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.0
3,"► Ambattur‎ (1 C, 10 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"► Anna Nagar‎ (2 C, 6 F)",0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"► Anna Salai‎ (4 C, 32 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"► Ashok Nagar, Chennai‎ (17 F)",0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,► Assisi Nagar‎ (2 F),0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"► Besant Nagar‎ (3 C, 6 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.022222,0.0,0.011111,0.0,0.0,0.0,0.011111,0.0,0.011111,0.0,0.011111,0.0,0.055556,0.055556,0.0,0.0,0.011111,0.0,0.033333,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.011111,0.033333,0.011111,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.011111,0.0,0.0,0.0,0.0,0.0,0.011111,0.011111,0.0,0.0,0.066667,0.2,0.0,0.0,0.022222,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.011111,0.0,0.022222,0.0,0.0,0.0,0.0,0.022222,0.011111,0.011111,0.011111,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.0,0.011111,0.0,0.0,0.0
9,"► Chepauk‎ (3 C, 6 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.030769,0.0,0.0,0.0,0.0,0.0,0.0,0.030769,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.030769,0.0,0.0,0.0,0.076923,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.015385,0.030769,0.0,0.0,0.015385,0.030769,0.0,0.0,0.015385,0.0,0.0,0.030769,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.015385,0.0,0.015385,0.153846,0.0,0.0,0.0,0.0,0.0,0.0,0.030769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.015385,0.0,0.0,0.0,0.030769,0.0,0.0,0.0,0.046154,0.0,0.076923,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.015385,0.015385,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
len(ch_grouped[ch_grouped["Hotel"] > 0])

28

###### Analyze the Hotel data

In [23]:
ch_hotel = ch_grouped[["Neighborhoods","Hotel"]]

In [24]:
ch_hotel.head()

Unnamed: 0,Neighborhoods,Hotel
0,► Adambakkam‎ (13 F),0.042254
1,"► Adyar‎ (5 C, 17 F)",0.090909
2,► Alwarthirunagar‎ (9 F),0.0
3,"► Ambattur‎ (1 C, 10 F)",0.0
4,"► Anna Nagar‎ (2 C, 6 F)",0.0


## 7. Clustering using K-Means clustering algorithm

Run k-means to cluster the neighborhoods in Chennai into 5 clusters.

In [25]:
# set number of clusters
kclusters = 4

ch_clustering = ch_hotel.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ch_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([3, 0, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [26]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
ch_merged = ch_hotel.copy()

# add clustering labels
ch_merged["Cluster Labels"] = kmeans.labels_

In [27]:
ch_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
ch_merged.head()

Unnamed: 0,Neighborhood,Hotel,Cluster Labels
0,► Adambakkam‎ (13 F),0.042254,3
1,"► Adyar‎ (5 C, 17 F)",0.090909,0
2,► Alwarthirunagar‎ (9 F),0.0,1
3,"► Ambattur‎ (1 C, 10 F)",0.0,1
4,"► Anna Nagar‎ (2 C, 6 F)",0.0,1


In [28]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
ch_merged = ch_merged.join(ch_df.set_index("Neighborhood"), on="Neighborhood")

print(ch_merged.shape)
ch_merged.head() # check the last columns!

(63, 5)


Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Latitude,Longitude
0,► Adambakkam‎ (13 F),0.042254,3,12.99192,80.20603
1,"► Adyar‎ (5 C, 17 F)",0.090909,0,12.97815,80.18883
2,► Alwarthirunagar‎ (9 F),0.0,1,13.05055,80.18397
3,"► Ambattur‎ (1 C, 10 F)",0.0,1,13.12908,80.16889
4,"► Anna Nagar‎ (2 C, 6 F)",0.0,1,12.97673,80.144


In [29]:

# sort the results by Cluster Labels
print(ch_merged.shape)
ch_merged.sort_values(["Cluster Labels"], inplace=True)
ch_merged

(63, 5)


Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Latitude,Longitude
1,"► Adyar‎ (5 C, 17 F)",0.090909,0,12.97815,80.18883
47,"► St. Thomas Mount‎ (2 C, 41 F)",0.142857,0,13.00799,80.19599
29,► Nanganallur‎ (3 F),0.133333,0,12.9764,80.1876
55,► Urapakkam‎ (3 F),0.083333,0,12.86342,80.06916
43,► Saidapet‎ (20 F),0.101695,0,13.02027,80.22131
16,"► Guindy‎ (4 C, 1 P, 17 F)",0.085714,0,13.00408,80.22012
28,"► Mylapore‎ (3 C, 16 F)",0.11,0,13.03155,80.26022
62,"► Washermanpet‎ (1 C, 1 F)",0.0,1,13.1095,80.28701
30,► Neelankarai‎ (2 F),0.0,1,12.95014,80.25505
61,► Vyasarpadi‎ (1 C),0.0,1,13.11778,80.25168


#### Map Visualization - Neighborhood of Chennai

In [30]:
import numpy as np
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ch_merged['Latitude'], ch_merged['Longitude'], ch_merged['Neighborhood'], ch_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [31]:
# save the map as HTML file
map_clusters.save('map_clusters.html')


## 8. Analyze the Clusters


##### First Cluster (Cluster Label - 0)

In [32]:
ch_merged.loc[ch_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Latitude,Longitude
1,"► Adyar‎ (5 C, 17 F)",0.090909,0,12.97815,80.18883
47,"► St. Thomas Mount‎ (2 C, 41 F)",0.142857,0,13.00799,80.19599
29,► Nanganallur‎ (3 F),0.133333,0,12.9764,80.1876
55,► Urapakkam‎ (3 F),0.083333,0,12.86342,80.06916
43,► Saidapet‎ (20 F),0.101695,0,13.02027,80.22131
16,"► Guindy‎ (4 C, 1 P, 17 F)",0.085714,0,13.00408,80.22012
28,"► Mylapore‎ (3 C, 16 F)",0.11,0,13.03155,80.26022


##### Secound Cluster (Cluster Label - 1)

In [33]:
ch_merged.loc[ch_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Latitude,Longitude
62,"► Washermanpet‎ (1 C, 1 F)",0.0,1,13.1095,80.28701
30,► Neelankarai‎ (2 F),0.0,1,12.95014,80.25505
61,► Vyasarpadi‎ (1 C),0.0,1,13.11778,80.25168
32,"► Padi, Chennai‎ (6 F)",0.0,1,13.09971,80.16168
33,"► Pallavaram‎ (2 C, 19 F)",0.0,1,12.97444,80.14852
34,"► Pallikaranai‎ (1 C, 1 F)",0.0,1,12.95567,80.2208
35,► Pattaravakkam‎ (1 C),0.0,1,13.111644,80.156137
37,► Perungudi‎ (8 F),0.0,1,12.96356,80.24001
38,"► Poonamallee‎ (2 C, 4 F)",0.0,1,13.05137,80.11267
39,► Puzhal‎ (2 C),0.0,1,13.15946,80.20718


#### Third Cluster (Cluster Label - 2)

In [34]:

ch_merged.loc[ch_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Latitude,Longitude
60,► Villivakkam‎ (1 C),0.25,2,13.13433,80.20618
27,"► Muttukadu, Chennai‎ (3 C, 1 F)",0.214286,2,12.83165,80.24207


#### Fourth Cluster (Cluster Label - 3)

In [35]:
ch_merged.loc[ch_merged['Cluster Labels'] == 3]

Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Latitude,Longitude
56,"► Vadapalani‎ (2 C, 10 F)",0.059701,3,13.05226,80.2112
54,"► Triplicane‎ (3 C, 26 F)",0.05,3,13.06289,80.27146
57,► Valmiki Nagar‎ (4 F),0.030303,3,12.98139,80.26377
0,► Adambakkam‎ (13 F),0.042254,3,12.99192,80.20603
44,"► Semmencherry‎ (1 C, 1 F)",0.047619,3,12.86557,80.22051
41,"► Royapettah‎ (2 C, 1 F)",0.07,3,13.05352,80.26826
25,"► Meenambakkam‎ (1 C, 1 F)",0.058824,3,12.98646,80.176
21,"► Kotturpuram‎ (2 C, 4 F)",0.05,3,13.01696,80.24276
20,► Kosapet‎ (8 F),0.0625,3,13.09453,80.25482
18,► Kodambakkam‎ (8 F),0.028571,3,13.02883,80.21999


### Final Remarks

In this project, main focuss on Hotels around the Chennai city. Using k-means cluster, four clusters are analyzed.It is apparent that looking at nearby venues, it seems second Cluster might be a good location as there are not a lot of hotels in these areas. It is apparent that most of the places in cluster 1 are well connected to Airport/Railway Station, Beaches and other popular historical places. Therefore, this project recommends to open hotel business at **Second cluster (Cluster Label - 1)** places  that have least number of existing hotels.