<H2> Importing required Libraries <H2>

In [1]:
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
try:
 import geocoder
except:
 !pip install geocoder
import geocoder
import requests
from bs4 import BeautifulSoup
try:
 import folium
except:
 !pip install folium
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
import matplotlib as mpl
import matplotlib.pyplot as plt

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 10.7 MB/s eta 0:00:01
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6
  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 7.0 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1


<H2> Web Scraping <H2>

In [2]:
# Send the GET request
data = requests.get("https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Hyderabad,_India").text
# Parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')
# Create a list to store neighbourhood data
neighborhoodList = []
# Append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)
# Create a new DataFrame from the list
neig_df = pd.DataFrame({"Neighborhood": neighborhoodList})
neig_df.head()

Unnamed: 0,Neighborhood
0,A. C. Guards
1,A. S. Rao Nagar
2,Abhyudaya Nagar
3,Abids
4,Adibatla


<H2> Geographical Coordinates <H2>

In [3]:
# Defining a function to get coordinates
def get_latlng(neighborhood):
   # initialize your variable to None 
    lat_lng_coords = None
  # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Hyderabad, India'.format(neighborhood))
        lat_lng_coords = g.latlng
        return lat_lng_coords
# Call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in neig_df["Neighborhood"].tolist()]

<H2> Merge the Coordinates <H2>

In [4]:
# Create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
# Merge the coordinates into the original dataframe
neig_df['Latitude'] = df_coords['Latitude']
neig_df['Longitude'] = df_coords['Longitude']
print(neig_df.shape)
neig_df

(200, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,A. C. Guards,17.395015,78.459812
1,A. S. Rao Nagar,17.411200,78.508240
2,Abhyudaya Nagar,17.337650,78.564140
3,Abids,17.389800,78.476580
4,Adibatla,17.235790,78.541300
...,...,...,...
195,Serilingampally,17.482160,78.323000
196,Shah-Ali-Banda,17.357390,78.473200
197,Shahran Market,17.364890,78.476290
198,Shanker Mutt,17.399817,78.507919


<H2> Coordinates of Hyderabad <H2>

In [5]:
g=geocoder.arcgis('Hyderabad, India')
hyd_lat=g.latlng[0]
hyd_lng=g.latlng[1]
print("The Latitude and Longitude of Hyderabad is {} and {}".format(hyd_lat, hyd_lng))

The Latitude and Longitude of Hyderabad is 17.394870000000026 and 78.47076000000004


In [6]:
import folium

<H2> Mapping <H2>

In [7]:
# Create map of Hyderabad using latitude and longitude values
map_hyd = folium.Map(location=[hyd_lat, hyd_lng], zoom_start=10)
map_hyd

In [8]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [9]:
# Add markers to map
folium.Marker([hyd_lat, hyd_lng], popup = '<i> Hyderabad </i>', color = 'red',
              tooltip = 'Click to see').add_to(map_hyd)
# Markers for Localities
for lat, lng, neighborhood in zip(neig_df['Latitude'], neig_df['Longitude'], neig_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat, lng],radius=5,popup=label,color='blue',fill=True,fill_color='#3186cc',fill_opacity=0.7).add_to(map_hyd)

In [10]:
map_hyd

<H2>  Foursqaure API <H2>

In [11]:
import json, requests
from pandas.io.json import json_normalize

In [12]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180323'
radius = 2000
LIMIT = 100
venues = []
for lat, long, neighborhood in zip(neig_df['Latitude'], neig_df['Longitude'], neig_df['Neighborhood']):
    # Create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(CLIENT_ID,CLIENT_SECRET,VERSION,lat,long,radius,LIMIT)
   # Make the GET request 
    results = requests.get(url).json()["response"]['groups'][0]['items']
    # Return only relevant information for each nearby venue
    for venue in results:
        venues.append((neighborhood,lat,long,venue['venue']['name'],
                       venue['venue']['location']['lat'],venue['venue']['location']['lng'],venue['venue']['categories'][0]['name']))


<H3> New DataFrame <H3>

In [13]:
venues_df = pd.DataFrame(venues)
# Defining the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
print(venues_df.shape)
venues_df.head(50)

(6215, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,A. C. Guards,17.395015,78.459812,Cafe Niloufer & Bakers,17.399715,78.462881,Café
1,A. C. Guards,17.395015,78.459812,Chicha's,17.403255,78.460152,Hyderabadi Restaurant
2,A. C. Guards,17.395015,78.459812,Subhan Bakery,17.392412,78.464712,Bakery
3,A. C. Guards,17.395015,78.459812,Taiba Bakers & Confectioners,17.40253,78.456823,Bakery
4,A. C. Guards,17.395015,78.459812,Nizam club,17.403221,78.468729,Lounge
5,A. C. Guards,17.395015,78.459812,Siraj's International Juice Center,17.395579,78.442701,Juice Bar
6,A. C. Guards,17.395015,78.459812,Birla Science Museum,17.403854,78.469457,Science Museum
7,A. C. Guards,17.395015,78.459812,Laxman Ki Bandi,17.378895,78.463973,South Indian Restaurant
8,A. C. Guards,17.395015,78.459812,Spice 6,17.409007,78.450559,Bistro
9,A. C. Guards,17.395015,78.459812,Shahi Dastarkhwan,17.40264,78.461399,Hyderabadi Restaurant


In [14]:
# Lets check how many venues were returned for each neighbourhood
venues_df.groupby(["Neighborhood"]).count()
# Lets check out how many unique categories can be curated from all the returned values
print('There are {} unique categories.'.format(len(venues_df['VenueCategory'].unique())))


There are 179 unique categories.


In [15]:
# Displaying the first 50 Venue Category names
venues_df['VenueCategory'].unique()[:50]

array(['Café', 'Hyderabadi Restaurant', 'Bakery', 'Lounge', 'Juice Bar',
       'Science Museum', 'South Indian Restaurant', 'Bistro',
       'Ice Cream Shop', 'Vegetarian / Vegan Restaurant',
       'Indian Restaurant', 'Stadium', 'Park', 'Hotel',
       'Middle Eastern Restaurant', 'Shoe Store', 'Diner', 'Hotel Bar',
       'Neighborhood', 'Dessert Shop', 'Performing Arts Venue',
       'Pizza Place', 'Mobile Phone Shop', 'Snack Place',
       'Fast Food Restaurant', 'Coffee Shop', 'Department Store',
       'Chinese Restaurant', 'Fried Chicken Joint', 'Shopping Mall',
       'Electronics Store', 'Clothing Store', 'Hookah Bar', 'Bookstore',
       'Movie Theater', 'Sandwich Place', 'Convenience Store',
       'Asian Restaurant', 'Train Station', 'Light Rail Station',
       'Restaurant', 'Food Truck', 'Chaat Place', 'Burger Joint',
       'Smoke Shop', 'Multiplex', 'Breakfast Spot', 'Bar', 'Food',
       'Gaming Cafe'], dtype=object)

<H2> Analyzing each Neighbourhood <H2>

In [16]:
# One hot encoding
neig_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")
# Adding neighborhood column back to dataframe
neig_onehot['Neighborhoods'] = venues_df['Neighborhood']
# Moving neighbourhood column to the first column
fixed_columns = [neig_onehot.columns[-1]] + list(neig_onehot.columns[:-1])
neig_onehot = neig_onehot[fixed_columns]
print(neig_onehot.shape)

(6215, 180)


In [17]:
neig_grouped=neig_onehot.groupby(["Neighborhoods"]).sum().reset_index()
print(neig_grouped.shape)
neig_grouped

(198, 180)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,American Restaurant,Andhra Restaurant,Arcade,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Tea Room,Tech Startup,Temple,Thai Restaurant,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Wings Joint,Women's Store,Zoo
0,A. C. Guards,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1,A. S. Rao Nagar,0,0,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
2,Abhyudaya Nagar,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Abids,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,Adikmet,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,Serilingampally,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
194,Shah-Ali-Banda,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
195,Shahran Market,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
196,Shanker Mutt,0,0,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0


In [18]:
len((neig_grouped[neig_grouped["Gaming Cafe"]> 0]))

19

In [19]:
# Creating a dataframe for Gaming Cafe data only
neig_cafe = neig_grouped[["Neighborhoods","Gaming Cafe"]]

<H3> Clustering the Neighbourhoods <H3>

In [20]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

<H2> Applying the K-Means clustering Algorithm <H2>

In [21]:
# Setting the number of clusters
kclusters = 3
neig_clustering = neig_cafe.drop(["Neighborhoods"], 1)

In [22]:
# set number of clusters
kclusters = 3
neig_clustering = neig_cafe.drop(["Neighborhoods"], 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(neig_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]



array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0], dtype=int32)

In [23]:
# Creating a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
neig_merged = neig_cafe.copy()
# Add the clustering labels
neig_merged["Cluster Labels"] = kmeans.labels_
neig_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
neig_merged.head(10)

Unnamed: 0,Neighborhood,Gaming Cafe,Cluster Labels
0,A. C. Guards,0,0
1,A. S. Rao Nagar,0,0
2,Abhyudaya Nagar,0,0
3,Abids,1,1
4,Adikmet,0,0
5,Afzal Gunj,0,0
6,Aghapura,0,0
7,"Aliabad, Hyderabad",0,0
8,Alijah Kotla,0,0
9,Allwyn Colony,0,0


In [24]:
# Adding latitude and longitude values to the existing dataframe
neig_merged['Latitude'] = neig_df['Latitude']
neig_merged['Longitude'] = neig_df['Longitude']
# Sorting the results by Cluster Labels
neig_merged.sort_values(["Cluster Labels"], inplace=True)
neig_merged

Unnamed: 0,Neighborhood,Gaming Cafe,Cluster Labels,Latitude,Longitude
0,A. C. Guards,0,0,17.395015,78.459812
123,Malkajgiri mandal,0,0,17.374930,78.515670
124,Mallapur,0,0,17.447370,78.535200
125,Mallepally,0,0,17.447370,78.535200
126,Manikonda,0,0,17.288640,78.497960
...,...,...,...,...,...
35,Boggulkunta,1,1,17.505990,78.304540
167,Pisal Banda,1,1,17.442320,78.496170
27,"Bank Street, Hyderabad",1,1,17.388601,78.476645
84,Jubilee Hills,1,1,17.421967,78.525592


<H2> Visualizing the Resulting Clusters ,H2>

In [25]:
# Create map of Hyderabad using latitude and longitude values
map_clusters = folium.Map(location=[hyd_lat, hyd_lng], zoom_start=10)
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(neig_merged['Latitude'], neig_merged['Longitude'], neig_merged['Neighborhood'], neig_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

In [26]:
map_clusters

In [28]:
len(neig_merged.loc[neig_merged['Cluster Labels'] == 0])

179

In [29]:
len(neig_merged.loc[neig_merged['Cluster Labels'] == 1])

19

In [30]:
len(neig_merged.loc[neig_merged['Cluster Labels'] == 2])

0