# Capstone Project – The Battle of Neighborhoods

### 1. Installing and Importing Python Libraries and Dependencies

In [1]:
!pip install geocoder
!pip install folium



In [2]:
import numpy as np

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values # to get coordinates

import requests
import geocoder# library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries  are imported.")


Libraries  are imported.


### 2. Scrap data from Wikipedia page into a DataFrame


In [3]:
# send the GET request
data = requests.get("https://commons.wikimedia.org/wiki/Category:Suburbs_of_Hyderabad,_India").text

In [4]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

In [5]:
# create a list to store neighborhood data
neighborhoodList = []

In [6]:
# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)

In [7]:
 # create a new DataFrame from the list
kl_df = pd.DataFrame({"Neighborhood": neighborhoodList})
kl_df.head()

Unnamed: 0,Neighborhood
0,"► Abids‎ (1 C, 13 F)"
1,"► Alwal‎ (1 C, 1 F)"
2,"► Ameerpet, Hyderabad‎ (3 C, 21 F)"
3,"► Bandlaguda, Rangareddy‎ (1 C, 2 F)"
4,"► Banjara Hills‎ (3 C, 25 F)"


In [8]:
kl_df.shape


(54, 1)

### 3. Get the geographical coordinates

In [9]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Hyderabad, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords


In [10]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in kl_df["Neighborhood"].tolist() ]
        

In [11]:
coords

[[17.389800000000037, 78.47658000000007],
 [17.535430000000076, 78.54427000000004],
 [17.43482000000006, 78.44949000000008],
 [17.299820000000068, 78.46495000000004],
 [17.415350000000046, 78.43435000000005],
 [17.40211000000005, 78.47770000000008],
 [17.447290000000066, 78.45396000000005],
 [17.40954000000005, 78.57896000000005],
 [17.536218869427803, 78.2350425425703],
 [17.40893503530367, 78.32674007784891],
 [17.40301000000005, 78.49792000000008],
 [17.40893503530367, 78.32674007784891],
 [17.368570000000034, 78.53515000000004],
 [17.409950000000038, 78.48229000000003],
 [17.45333000000005, 78.43034000000006],
 [17.43181000000004, 78.38636000000008],
 [17.522760000000062, 78.43862000000007],
 [17.46686941076456, 78.24915353871232],
 [17.389410000000055, 78.40406000000007],
 [17.32707000000005, 78.60533000000004],
 [17.448230000000024, 78.37429000000003],
 [17.399230000000045, 78.48073000000005],
 [17.36838000000006, 78.39999000000006],
 [17.42865000000006, 78.39762000000007],
 [17.

In [12]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [13]:
# merge the coordinates into the original dataframe
kl_df['Latitude'] = df_coords['Latitude']
kl_df['Longitude'] = df_coords['Longitude']

In [14]:
# check the neighborhoods and the coordinates
print(kl_df.shape)
kl_df

(54, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,"► Abids‎ (1 C, 13 F)",17.3898,78.47658
1,"► Alwal‎ (1 C, 1 F)",17.53543,78.54427
2,"► Ameerpet, Hyderabad‎ (3 C, 21 F)",17.43482,78.44949
3,"► Bandlaguda, Rangareddy‎ (1 C, 2 F)",17.29982,78.46495
4,"► Banjara Hills‎ (3 C, 25 F)",17.41535,78.43435
5,"► Basheerbagh‎ (1 C, 7 F)",17.40211,78.4777
6,"► Begumpet‎ (5 C, 9 F)",17.44729,78.45396
7,► Boduppal‎ (2 F),17.40954,78.57896
8,"► Bolarum‎ (3 C, 1 F)",17.536219,78.235043
9,"► Cavalry Barracks, Hyderabad‎ (1 C)",17.408935,78.32674


In [15]:
# save the DataFrame as CSV file
kl_df.to_csv("kl_df.csv", index=False)


### 4. Create a map of Hyderabad with neighborhoods superimposed on top


In [16]:
# get the coordinates of Hyderabad
address = 'Hyderabad, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Hyderabad, India {}, {}.'.format(latitude, longitude))


The geograpical coordinate of Hyderabad, India 17.3616079, 78.4746286.


In [17]:
# create map of Hyderabad using latitude and longitude values
map_kl = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(kl_df['Latitude'], kl_df['Longitude'], kl_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_kl)  
    
map_kl

In [18]:
# save the map as HTML file
map_kl.save('map_kl.html')


### 5. Use the Foursquare API to explore the neighborhoods

In [19]:
# define Foursquare Credentials and Version
CLIENT_ID = 'GYL2VDJZZAPGN2GHCPZ2LAFL2WPVW5QJXKUQIHSEBKCAJKXO' # your Foursquare ID
CLIENT_SECRET = 'CNB3BKS33O5GSV2FBGXY1KSLA2PKQZUQLWPQOKXFSX4UAYPY' # your Foursquare Secret
VERSION = '20180604' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GYL2VDJZZAPGN2GHCPZ2LAFL2WPVW5QJXKUQIHSEBKCAJKXO
CLIENT_SECRET:CNB3BKS33O5GSV2FBGXY1KSLA2PKQZUQLWPQOKXFSX4UAYPY


Now, let's get the top 100 venues that are within a radius of 2000 meters.



In [20]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(kl_df['Latitude'], kl_df['Longitude'], kl_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [21]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()


(2158, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,"► Abids‎ (1 C, 13 F)",17.3898,78.47658,Santosh Dhaba,17.388485,78.479509,Indian Restaurant
1,"► Abids‎ (1 C, 13 F)",17.3898,78.47658,Pragati,17.388088,78.481134,South Indian Restaurant
2,"► Abids‎ (1 C, 13 F)",17.3898,78.47658,Mayur Pan Shop,17.388894,78.480578,Juice Bar
3,"► Abids‎ (1 C, 13 F)",17.3898,78.47658,Karachi Bakery,17.383454,78.475075,Bakery
4,"► Abids‎ (1 C, 13 F)",17.3898,78.47658,Ram ki Bandi,17.382398,78.475014,Food Truck


Let's check how many venues were returned for each neighorhood



In [22]:
venues_df.groupby(["Neighborhood"]).count()


Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"► Abids‎ (1 C, 13 F)",79,79,79,79,79,79
"► Alwal‎ (1 C, 1 F)",4,4,4,4,4,4
"► Ameerpet, Hyderabad‎ (3 C, 21 F)",100,100,100,100,100,100
"► Bandlaguda, Rangareddy‎ (1 C, 2 F)",4,4,4,4,4,4
"► Banjara Hills‎ (3 C, 25 F)",100,100,100,100,100,100
"► Basheerbagh‎ (1 C, 7 F)",96,96,96,96,96,96
"► Begumpet‎ (5 C, 9 F)",52,52,52,52,52,52
► Boduppal‎ (2 F),7,7,7,7,7,7
"► Bolarum‎ (3 C, 1 F)",3,3,3,3,3,3
"► Cavalry Barracks, Hyderabad‎ (1 C)",20,20,20,20,20,20


Let's find out how many unique categories can be curated from all the returned venues



In [23]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))


There are 150 uniques categories.


In [24]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:50]


array(['Indian Restaurant', 'South Indian Restaurant', 'Juice Bar',
       'Bakery', 'Food Truck', 'Hotel', 'Ice Cream Shop', 'Shoe Store',
       'Diner', 'Neighborhood', 'Lounge', 'Burger Joint', 'Chaat Place',
       'Dessert Shop', 'Café', 'Stadium', 'Snack Place', 'Science Museum',
       'Chinese Restaurant', 'Restaurant', 'Smoke Shop', 'Coffee Shop',
       'Fast Food Restaurant', 'Breakfast Spot', 'Hotel Bar',
       'Department Store', 'Mobile Phone Shop', 'Shopping Mall', 'Bar',
       'Food', 'Multiplex', 'Performing Arts Venue', 'Gaming Cafe',
       'Indie Movie Theater', 'Farmers Market', 'Pizza Place',
       'Fried Chicken Joint', 'Hookah Bar', 'Clothing Store',
       'Sandwich Place', 'Food Court', 'Jewelry Store', 'Golf Course',
       'Asian Restaurant', 'Pharmacy', 'ATM', 'Pub', 'Bookstore',
       'American Restaurant', 'Vegetarian / Vegan Restaurant'],
      dtype=object)

In [25]:
# check if the results contain "Shopping Mall"
"Neighborhood" in venues_df['VenueCategory'].unique()

True

### 6. Analyze Neighborhood

In [26]:
# one hot encoding
kl_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
kl_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [kl_onehot.columns[-1]] + list(kl_onehot.columns[:-1])
kl_onehot = kl_onehot[fixed_columns]

print(kl_onehot.shape)
kl_onehot.head()


(2158, 151)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Service,American Restaurant,Andhra Restaurant,Arcade,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auditorium,BBQ Joint,Bakery,Bank,Bar,Basketball Court,Bed & Breakfast,Beer Garden,Bengali Restaurant,Bistro,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Bus Station,Cable Car,Cafeteria,Café,Chaat Place,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Donut Shop,Dumpling Restaurant,Electronics Store,Ethiopian Restaurant,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Food,Food & Drink Shop,Food Court,Food Service,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gastropub,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Historic Site,History Museum,Hookah Bar,Hot Dog Joint,Hotel,Hotel Bar,Hunan Restaurant,Hyderabadi Restaurant,IT Services,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Irish Pub,Italian Restaurant,Jewelry Store,Juice Bar,Lake,Light Rail Station,Liquor Store,Lounge,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Motel,Motorcycle Shop,Movie Theater,Multicuisine Indian Restaurant,Multiplex,Neighborhood,New American Restaurant,Nightclub,North Indian Restaurant,Office,Outdoors & Recreation,Park,Parsi Restaurant,Performing Arts Venue,Pharmacy,Pizza Place,Platform,Playground,Pub,Racetrack,Rajasthani Restaurant,Recreation Center,Residential Building (Apartment / Condo),Restaurant,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Shoe Store,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Social Club,South Indian Restaurant,Spa,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Taxi Stand,Temple,Thai Restaurant,Train Station,Vegetarian / Vegan Restaurant,Volleyball Court,Women's Store
0,"► Abids‎ (1 C, 13 F)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"► Abids‎ (1 C, 13 F)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"► Abids‎ (1 C, 13 F)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"► Abids‎ (1 C, 13 F)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"► Abids‎ (1 C, 13 F)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category



In [27]:
kl_grouped = kl_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(kl_grouped.shape)
kl_grouped

(52, 151)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Service,American Restaurant,Andhra Restaurant,Arcade,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auditorium,BBQ Joint,Bakery,Bank,Bar,Basketball Court,Bed & Breakfast,Beer Garden,Bengali Restaurant,Bistro,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Bus Station,Cable Car,Cafeteria,Café,Chaat Place,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Donut Shop,Dumpling Restaurant,Electronics Store,Ethiopian Restaurant,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Food,Food & Drink Shop,Food Court,Food Service,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gastropub,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Historic Site,History Museum,Hookah Bar,Hot Dog Joint,Hotel,Hotel Bar,Hunan Restaurant,Hyderabadi Restaurant,IT Services,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Irish Pub,Italian Restaurant,Jewelry Store,Juice Bar,Lake,Light Rail Station,Liquor Store,Lounge,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Motel,Motorcycle Shop,Movie Theater,Multicuisine Indian Restaurant,Multiplex,Neighborhood,New American Restaurant,Nightclub,North Indian Restaurant,Office,Outdoors & Recreation,Park,Parsi Restaurant,Performing Arts Venue,Pharmacy,Pizza Place,Platform,Playground,Pub,Racetrack,Rajasthani Restaurant,Recreation Center,Residential Building (Apartment / Condo),Restaurant,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Shoe Store,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Social Club,South Indian Restaurant,Spa,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Taxi Stand,Temple,Thai Restaurant,Train Station,Vegetarian / Vegan Restaurant,Volleyball Court,Women's Store
0,"► Abids‎ (1 C, 13 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037975,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.037975,0.012658,0.050633,0.012658,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025316,0.037975,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.050633,0.0,0.012658,0.0,0.012658,0.0,0.012658,0.0,0.012658,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.037975,0.012658,0.0,0.0,0.0,0.075949,0.126582,0.0,0.012658,0.0,0.0,0.025316,0.025316,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.012658,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.025316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037975,0.0,0.012658,0.0,0.012658,0.012658,0.0,0.012658,0.012658,0.025316,0.0,0.0,0.025316,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"► Alwal‎ (1 C, 1 F)",0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"► Ameerpet, Hyderabad‎ (3 C, 21 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.04,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.02,0.01,0.06,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.02,0.0,0.02,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.06,0.01,0.0,0.01,0.0,0.02,0.14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.06,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.05,0.0,0.0
3,"► Bandlaguda, Rangareddy‎ (1 C, 2 F)",0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"► Banjara Hills‎ (3 C, 25 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.02,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.08,0.01,0.03,0.01,0.0,0.05,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.02,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.03,0.0,0.0,0.0,0.0,0.03,0.09,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.02,0.0,0.03,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
5,"► Basheerbagh‎ (1 C, 7 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.010417,0.010417,0.0,0.0,0.03125,0.0,0.041667,0.010417,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.020833,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052083,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.010417,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.010417,0.0,0.052083,0.010417,0.0,0.020833,0.0,0.052083,0.104167,0.0,0.010417,0.0,0.0,0.020833,0.020833,0.010417,0.0,0.0,0.010417,0.0,0.0,0.010417,0.0,0.010417,0.0,0.0,0.03125,0.0,0.072917,0.010417,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.010417,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.020833,0.0,0.010417,0.020833,0.0,0.010417,0.010417,0.010417,0.0,0.0,0.020833,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0
6,"► Begumpet‎ (5 C, 9 F)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.019231,0.0,0.0,0.0,0.019231,0.0,0.038462,0.0,0.0,0.019231,0.0,0.0,0.019231,0.0,0.0,0.057692,0.0,0.038462,0.057692,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.019231,0.0,0.076923,0.0,0.0,0.019231,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.019231,0.076923,0.0,0.0
7,► Boduppal‎ (2 F),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"► Bolarum‎ (3 C, 1 F)",0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"► Cavalry Barracks, Hyderabad‎ (1 C)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.05,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.05,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0


In [28]:
len(kl_grouped[kl_grouped["Shopping Mall"] > 0])


18

Create a new DataFrame for Shopping Mall data only



In [29]:
kl_mall = kl_grouped[["Neighborhoods","Shopping Mall"]]


In [30]:
kl_mall.head()


Unnamed: 0,Neighborhoods,Shopping Mall
0,"► Abids‎ (1 C, 13 F)",0.012658
1,"► Alwal‎ (1 C, 1 F)",0.0
2,"► Ameerpet, Hyderabad‎ (3 C, 21 F)",0.02
3,"► Bandlaguda, Rangareddy‎ (1 C, 2 F)",0.0
4,"► Banjara Hills‎ (3 C, 25 F)",0.02


### 7. Cluster Neighborhoods

Run k-means to cluster the neighborhoods in Hyderabad into 5 clusters.



In [31]:
# set number of clusters
kclusters = 3

kl_clustering = kl_mall.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 0, 2, 0, 2, 2, 0, 0, 0, 0], dtype=int32)

In [32]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
kl_merged = kl_mall.copy()

# add clustering labels
kl_merged["Cluster Labels"] = kmeans.labels_

In [33]:
kl_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
kl_merged.head()

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels
0,"► Abids‎ (1 C, 13 F)",0.012658,2
1,"► Alwal‎ (1 C, 1 F)",0.0,0
2,"► Ameerpet, Hyderabad‎ (3 C, 21 F)",0.02,2
3,"► Bandlaguda, Rangareddy‎ (1 C, 2 F)",0.0,0
4,"► Banjara Hills‎ (3 C, 25 F)",0.02,2


In [34]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
kl_merged = kl_merged.join(kl_df.set_index("Neighborhood"), on="Neighborhood")

print(kl_merged.shape)
kl_merged.head() # check the last columns!


(52, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,"► Abids‎ (1 C, 13 F)",0.012658,2,17.3898,78.47658
1,"► Alwal‎ (1 C, 1 F)",0.0,0,17.53543,78.54427
2,"► Ameerpet, Hyderabad‎ (3 C, 21 F)",0.02,2,17.43482,78.44949
3,"► Bandlaguda, Rangareddy‎ (1 C, 2 F)",0.0,0,17.29982,78.46495
4,"► Banjara Hills‎ (3 C, 25 F)",0.02,2,17.41535,78.43435


In [35]:
# sort the results by Cluster Labels
print(kl_merged.shape)
kl_merged.sort_values(["Cluster Labels"], inplace=True)
kl_merged


(52, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
51,"► Trimulgherry‎ (1 C, 3 F)",0.0,0,17.470723,78.504503
21,► Hydershakote‎ (14 F),0.0,0,17.36838,78.39999
44,► Sanathnagar‎ (8 F),0.0,0,17.45876,78.4431
23,"► Kachiguda‎ (1 C, 4 F)",0.0,0,17.38688,78.49553
43,► Pedda Amberpet‎ (1 F),0.0,0,17.32115,78.64237
50,"► Tarnaka‎ (1 C, 6 F)",0.0,0,17.408935,78.32674
27,► L. B. Nagar‎ (16 F),0.0,0,17.51265,78.44129
28,"► Madhapur‎ (1 C, 19 F)",0.0,0,17.459,78.3681
29,"► Malakpet‎ (3 C, 2 F)",0.0,0,17.37493,78.51567
30,"► Malkajgiri‎ (3 C, 6 F)",0.0,0,17.4393,78.5292


Finally, let's visualize the resulting clusters



In [36]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kl_merged['Latitude'], kl_merged['Longitude'], kl_merged['Neighborhood'], kl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [37]:
# save the map as HTML file
map_clusters.save('map_clusters.html')


### 8. Examine Clusters

#### Cluster 0

In [38]:
kl_merged.loc[kl_merged['Cluster Labels'] == 0]


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
51,"► Trimulgherry‎ (1 C, 3 F)",0.0,0,17.470723,78.504503
21,► Hydershakote‎ (14 F),0.0,0,17.36838,78.39999
44,► Sanathnagar‎ (8 F),0.0,0,17.45876,78.4431
23,"► Kachiguda‎ (1 C, 4 F)",0.0,0,17.38688,78.49553
43,► Pedda Amberpet‎ (1 F),0.0,0,17.32115,78.64237
50,"► Tarnaka‎ (1 C, 6 F)",0.0,0,17.408935,78.32674
27,► L. B. Nagar‎ (16 F),0.0,0,17.51265,78.44129
28,"► Madhapur‎ (1 C, 19 F)",0.0,0,17.459,78.3681
29,"► Malakpet‎ (3 C, 2 F)",0.0,0,17.37493,78.51567
30,"► Malkajgiri‎ (3 C, 6 F)",0.0,0,17.4393,78.5292


#### Cluster 1

In [39]:
kl_merged.loc[kl_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
26,► Kukatpally‎ (16 F),0.1,1,17.48735,78.42087
17,"► Golconda‎ (5 C, 4 F)",0.076923,1,17.38941,78.40406
12,"► Dilsukhnagar‎ (1 C, 2 F)",0.05,1,17.36857,78.53515
38,"► Nagole, Hyderabad‎ (4 F)",0.066667,1,17.372426,78.544543


#### Cluster 2

In [40]:
kl_merged.loc[kl_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
42,"► Old City (Hyderabad, India)‎ (8 C, 26 F)",0.01087,2,17.39487,78.47076
49,► Somajiguda‎ (5 F),0.02,2,17.42072,78.463
0,"► Abids‎ (1 C, 13 F)",0.012658,2,17.3898,78.47658
35,► Moazzam Jahi Market‎ (16 F),0.019608,2,17.38448,78.47442
32,► Masab Tank‎ (4 F),0.01,2,17.40093,78.45362
24,"► Khairtabad‎ (1 C, 2 F)",0.01,2,17.40592,78.45856
22,"► Jubilee Hills‎ (3 C, 8 F)",0.01,2,17.42865,78.39762
20,► Hyderguda‎ (2 F),0.011111,2,17.39923,78.48073
15,"► Gachibowli‎ (4 C, 17 F)",0.01,2,17.43181,78.38636
5,"► Basheerbagh‎ (1 C, 7 F)",0.010417,2,17.40211,78.4777
