In [9]:
import pandas as pd
import numpy as np
import requests as rs
from bs4 import BeautifulSoup

In [10]:
res = rs.get("https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Hyderabad,_India")
soup = BeautifulSoup(res.content, 'html.parser')
neighborhoods = []

for items in soup.find_all('li'):
    
    if str(items) == '<li><a href="/wiki/Somajiguda" title="Somajiguda">Somajiguda</a></li>':
        neighborhoods.append(items.string)
        break
    
    neighborhoods.append(items.string)


In [11]:
from geopy.geocoders import Nominatim 
import geopy

In [12]:
geolocator = Nominatim(timeout = 3)
lats = []
longs = []
curatedNeighborhoods = []
#zipCodes = []
i = 0

for items in neighborhoods:
    location=geolocator.geocode(items)
    if location != None:
        curatedNeighborhoods.append(items)
        lats.append(location.latitude)
        longs.append(location.longitude)
        #zipCodes.append(location.zipcode)

  """Entry point for launching an IPython kernel.


In [13]:
newHydDict = {"Neighborhood":curatedNeighborhoods,"Latitude":lats,"Longitude":longs}
HydDf = pd.DataFrame(data=newHydDict,columns=['Neighborhood','Latitude','Longitude'])

In [14]:
HydDf

Unnamed: 0,Neighborhood,Latitude,Longitude
0,A. S. Rao Nagar,17.479950,78.556834
1,A.C. Guards,17.402804,78.459487
2,Abhyudaya Nagar,18.990477,72.844057
3,Abids,17.389478,78.477182
4,Adikmet,17.409550,78.513094
...,...,...,...
176,Sikh Village,17.460098,78.487287
177,Silpa Avenue Colony,17.439550,78.367562
178,"Sindhi Colony, Secunderabad",17.441219,78.481048
179,Sitaphalmandi,17.429733,78.517426


In [15]:
import folium
import json,requests

hyd_lat = 17.3850
hyd_lng = 78.4867

map_hyd = folium.Map(location=[hyd_lat,hyd_lng], zoom_start=10)

for lat, lng, neighbourhood in zip(HydDf['Latitude'], HydDf['Longitude'], HydDf['Neighborhood']):
    label = '{}'.format(neighbourhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_hyd)  
    
map_hyd

In [None]:
CLIENT_ID = ''
CLIENT_SECRET = ''
VERSION = '20200408'


def getNearbyVenues(HydDf, radius=500, LIMIT = 100):

    venues_list=[]
    for name, lat, lng in zip(HydDf['Neighborhood'],HydDf['Latitude'],HydDf['Longitude']):
        print(name)

        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
     
        results = requests.get(url).json()["response"]['groups'][0]['items']
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Suburb', 
                  'Suburb Latitude', 
                  'Suburb Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return(nearby_venues)

df = getNearbyVenues(HydDf)

In [None]:
# One hot encoding
hyd_onehot = pd.get_dummies(df[['Category']], prefix="", prefix_sep="")

# Add neighborhood column back to dataframe
hyd_onehot['Neighborhood'] = df['Neighborhood'] 

# Move neighborhood column to the first column
fixed_columns = [hyd_onehot.columns[-1]] + hyd_onehot.columns[:-1].values.tolist()
hyd_onehot = hyd_onehot[fixed_columns]

hyd_onehot.head()

In [None]:
dfTmp = hyd_onehot.groupby('Neighbourhood').mean().reset_index()
dfTmp.head()

In [None]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# Create columns according to number of top venues
columns = ['']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# Create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Suburb'] = dfTmp['Suburb']

for ind in np.arange(dfTmp.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(dfTmp.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

In [None]:
dfClusters = dfTmp.drop('Suburb', 1)

In [None]:
from sklearn.metrics import silhouette_samples, silhouette_score

kclusters = 12
kM = dfClusters
kmeans = KMeans(n_clusters = kclusters, init = 'k-means++', random_state = 0).fit_predict(kM)

neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

hydM = df
hydM = hydM.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
hydM.dropna(inplace = True)
hydM['Cluster Labels'] = hydM['Cluster Labels'].astype(int)
hydM.head()


In [None]:
map_clusters = folium.Map(location=[hyd_lat, hyd_lng], zoom_start=12)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kolkata_merged['Latitude'], kolkata_merged['Longitude'], kolkata_merged['Neighbourhood'], kolkata_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' (Cluster ' + str(cluster + 1) + ')', parse_html=True)
    map_clusters.add_child(
        folium.features.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7))
       
map_clusters

In [None]:
hydM.loc[hydM['Cluster Labels'] == (val - 1), hydM.columns[[0] + np.arange(4, hydM.shape[1]).tolist()]]