# Capstone Project

## Import required libraries

In [134]:
from bs4 import BeautifulSoup
import requests 
import pandas as pd
import numpy as np

#for location data from address
from geopy.geocoders import Nominatim

import matplotlib.cm as cm
import matplotlib.colors as colors

#for mapping
import folium

#for clustering
from sklearn.cluster import KMeans

## Use BeautifulSoup and requests libraries to scrape websites

In [2]:
source=requests.get('https://simple.wikipedia.org/wiki/List_of_districts_in_Andhra_Pradesh').text
soup=BeautifulSoup(source,'lxml')

In [3]:
soup

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>List of districts in Andhra Pradesh - Simple English Wikipedia, the free encyclopedia</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"XreRfApAMNUAA9-rlccAAADU","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_districts_in_Andhra_Pradesh","wgTitle":"List of districts in Andhra Pradesh","wgCurRevisionId":6791553,"wgRevisionId":6791553,"wgArticleId":598587,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Commons category link is on Wikidata","Districts of Andhra Pradesh","India-related

### The required table about districts is in the first <tbody> tag

In [4]:
table=soup.find('tbody').text
table

'\nDistrict\n\nHeadquarters\n\nRevenue divisions\n\nMandals\n\nPopulation (2011)\n\nArea (km²)\n\nDensity (/km²)\n\n\nAnantapur\n\nAnantapur\n\n5\n\n63\n\n4,083,315\n\n19,130\n\n213\n\n\nChittoor\n\nChittoor\n\n3\n\n66\n\n4,170,468\n\n15,152\n\n275\n\n\nEast Godavari\n\nKakinada\n\n7\n\n64\n\n5,151,549\n\n10,807\n\n477\n\n\nGuntur\n\nGuntur\n\n4\n\n57\n\n4,889,230\n\n11,391\n\n429\n\n\nYSR Kadapa\n\nKadapa\n\n3\n\n50\n\n2,884,524\n\n15,359\n\n188\n\n\nKrishna\n\nMachilipatnam\n\n4\n\n50\n\n4,529,009\n\n8,727\n\n519\n\n\nKurnool\n\nKurnool\n\n3\n\n54\n\n4,046,601\n\n17,658\n\n229\n\n\nNellore\n\nNellore\n\n5\n\n46\n\n2,966,082\n\n13,076\n\n227\n\n\nPrakasam\n\nOngole\n\n3\n\n56\n\n3,392,764\n\n17,626\n\n193\n\n\nSrikakulam\n\nSrikakulam\n\n3\n\n37\n\n2,699,471\n\n5,837\n\n462\n\n\nVisakhapatnam\n\nVisakhapatnam\n\n4\n\n43\n\n4,288,113\n\n11,161\n\n340\n\n\nVizianagaram\n\nVizianagaram\n\n2\n\n34\n\n2,342,868\n\n6,539\n\n384\n\n\nWest Godavari\n\nEluru\n\n4\n\n46\n\n3,934,782\n\n7,742\n\

### The data in the string 'table' is separated by '\n\n'

In [5]:
table_data=table.split('\n\n')
table_data

['\nDistrict',
 'Headquarters',
 'Revenue divisions',
 'Mandals',
 'Population (2011)',
 'Area (km²)',
 'Density (/km²)',
 '\nAnantapur',
 'Anantapur',
 '5',
 '63',
 '4,083,315',
 '19,130',
 '213',
 '\nChittoor',
 'Chittoor',
 '3',
 '66',
 '4,170,468',
 '15,152',
 '275',
 '\nEast Godavari',
 'Kakinada',
 '7',
 '64',
 '5,151,549',
 '10,807',
 '477',
 '\nGuntur',
 'Guntur',
 '4',
 '57',
 '4,889,230',
 '11,391',
 '429',
 '\nYSR Kadapa',
 'Kadapa',
 '3',
 '50',
 '2,884,524',
 '15,359',
 '188',
 '\nKrishna',
 'Machilipatnam',
 '4',
 '50',
 '4,529,009',
 '8,727',
 '519',
 '\nKurnool',
 'Kurnool',
 '3',
 '54',
 '4,046,601',
 '17,658',
 '229',
 '\nNellore',
 'Nellore',
 '5',
 '46',
 '2,966,082',
 '13,076',
 '227',
 '\nPrakasam',
 'Ongole',
 '3',
 '56',
 '3,392,764',
 '17,626',
 '193',
 '\nSrikakulam',
 'Srikakulam',
 '3',
 '37',
 '2,699,471',
 '5,837',
 '462',
 '\nVisakhapatnam',
 'Visakhapatnam',
 '4',
 '43',
 '4,288,113',
 '11,161',
 '340',
 '\nVizianagaram',
 'Vizianagaram',
 '2',
 '34',
 '

### The first 7 elements of the list are the columns, so sepearate the values

In [6]:
column_val=table_data[0:7]
table_data=table_data[7:]
column_val

['\nDistrict',
 'Headquarters',
 'Revenue divisions',
 'Mandals',
 'Population (2011)',
 'Area (km²)',
 'Density (/km²)']

### The values in the list must be splitup according to column values

#### Calculate the total number of **districts**

In [7]:
total_num=len(table_data)
num_dist=total_num/7

#### Get data from entire list to respective lists for each column

In [8]:
District_data=[]
Headquarters_data=[]
Revenue_divisions_data=[]
Mandals_data=[]
Population_data=[]
Area_data=[]
Density_data=[]
a=0
b=1
c=2
d=3
e=4
f=5
g=6
while a<total_num:
  District_data.append(table_data[a])
  Headquarters_data.append(table_data[b])
  Revenue_divisions_data.append(table_data[c])
  Mandals_data.append(table_data[d])
  Population_data.append(table_data[e])
  Area_data.append(table_data[f])
  Density_data.append(table_data[g])
  a=a+7
  b=b+7
  c=c+7
  d=d+7
  e=e+7
  f=f+7
  g=g+7

In [9]:
District_data[:5]

['\nAnantapur', '\nChittoor', '\nEast Godavari', '\nGuntur', '\nYSR Kadapa']

#### The values have a extra '\n' before each name

In [10]:
d=[]
for a in District_data:
  spl=a.split('\n')
  d.append(spl[1])

d[:5]

['Anantapur', 'Chittoor', 'East Godavari', 'Guntur', 'YSR Kadapa']

In [11]:
District_data=d.copy()
District_data[:5]

['Anantapur', 'Chittoor', 'East Godavari', 'Guntur', 'YSR Kadapa']

#### Check other values as well.

In [12]:
Headquarters_data[:5]

['Anantapur', 'Chittoor', 'Kakinada', 'Guntur', 'Kadapa']

In [13]:
Revenue_divisions_data[:5]

['5', '3', '7', '4', '3']

In [14]:
Mandals_data[:5]

['63', '66', '64', '57', '50']

In [15]:
Population_data[:5]

['4,083,315', '4,170,468', '5,151,549', '4,889,230', '2,884,524']

In [16]:
Area_data[:5]

['19,130', '15,152', '10,807', '11,391', '15,359']

In [17]:
Density_data[:5]

['213', '275', '477', '429', '188']

## Create a DataFrame

In [18]:
column_val[0]='District'

In [19]:
District_info=pd.DataFrame(columns=column_val)
District_info

Unnamed: 0,District,Headquarters,Revenue divisions,Mandals,Population (2011),Area (km²),Density (/km²)


In [20]:
Density_data[12]=490

In [21]:
District_info['District']=District_data
District_info['Headquarters']=Headquarters_data
District_info['Revenue divisions']=Revenue_divisions_data
District_info['Mandals']=Mandals_data
District_info['Population (2011)']=Population_data
District_info['Area (km²)']=Area_data
District_info['Density (/km²)']=Density_data

In [22]:
District_info

Unnamed: 0,District,Headquarters,Revenue divisions,Mandals,Population (2011),Area (km²),Density (/km²)
0,Anantapur,Anantapur,5,63,4083315,19130,213
1,Chittoor,Chittoor,3,66,4170468,15152,275
2,East Godavari,Kakinada,7,64,5151549,10807,477
3,Guntur,Guntur,4,57,4889230,11391,429
4,YSR Kadapa,Kadapa,3,50,2884524,15359,188
5,Krishna,Machilipatnam,4,50,4529009,8727,519
6,Kurnool,Kurnool,3,54,4046601,17658,229
7,Nellore,Nellore,5,46,2966082,13076,227
8,Prakasam,Ongole,3,56,3392764,17626,193
9,Srikakulam,Srikakulam,3,37,2699471,5837,462


### Calculate the radius of each mandal in district(it is an approximation)

In [23]:
area=[19130,15152,10807,11391,15359,8727,17658,13076,17626,5837,11161,6539,7742]
num_mandals=District_info['Mandals'].astype('int').values
Radius_mandal=np.sqrt(area/(np.pi*num_mandals))

# Functions useful

In [24]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [25]:
def getNearbyVenues(names, latitudes, longitudes,radius):

    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius)
        print(name)
        # make the GET request
        results = requests.get(url).json()["response"]["groups"][0]["items"]
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

# Foursquare Credentials

In [123]:
CLIENT_ID = 'JH4VK1ADWRFMQIJKG1F1D4MJ3GK3LHOISB2TDPO0XT2AHRG3' 
CLIENT_SECRET = 'ZJZVWT0HNJEPSDOYBIDXGXDVALT541NHR4TL4032AVQO0Q4L' 
VERSION = '20180605'

# Data of each district

## Anantapur

### Get data related to Anantapur district by scraping a website

In [27]:
source1=requests.get('https://en.wikipedia.org/wiki/Anantapur_district').text
soup1=BeautifulSoup(source1,'lxml')
soup1

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>Anantapur district - Wikipedia</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"Xr2-OgpAIC4AAAtqeEEAAAAT","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Anantapur_district","wgTitle":"Anantapur district","wgCurRevisionId":956416073,"wgRevisionId":956416073,"wgArticleId":3360133,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Wikipedia articles incorporating a citation from the 1911 Encyclopaedia Britannica with Wikisource reference","Wikipedia articles incorporating text from the 1911 Enc

In [28]:
table1=soup1.find('table',class_='wikitable')
table1_data=table1.text
table1_data

'\n\n\n\nNo\nAnantapur Division\nDharmavaram Division\nKadiri Division\nKalyandurg Division\nPenukonda Division\n\n\n1\nAnantapur\nBathalapalle\nAmadagur\nBeluguppa\nAgali\n\n\n2\nAtmakur\nChennekothapalle\nBukkapatnam\nBommanahal\nAmarapuram\n\n\n3\nBukkarayasamudram\nDharmavaram\nGandlapenta\nBrahmasamudram\nChilamathur\n\n\n4\nGarladinne\nKanaganapalle\nKadiri\nD.Hirehal\nGorantla\n\n\n5\nGooty\nMudigubba\nKothacheruvu\nGummagatta\nGudibanda\n\n\n6\nGuntakal\nRamagiri\nNallacheruvu\nKalyandurg\nHindupur\n\n\n7\nKudair\nRaptadu\nNallamada\nKambadur\nLepakshi\n\n\n8\nNarpala\nTadimarri\nNambulapulakunta\nKanekal\nMadakasira\n\n\n9\nPamidi\n\nObuladevaracheruvu\nKundurpi\nParigi\n\n\n10\nPeddapappur\n\nPuttaparthi\nRayadurg\nPenu Konda\n\n\n11\nPeddavadugur\n\nTalupula\nSettur\nRoddam\n\n\n12\nPutlur\n\nTanakal\n\nRolla\n\n\n13\nSinganamala\n\n\n\nSomandepalle\n\n\n14\nTadipatri\n\n\n15\nUravakonda\n\n\n16\nVajrakarur\n\n\n17\nVidapanakal\n\n\n18\nYadiki\n\n\n19\nYellanur\n'

In [29]:
table1_data=table1_data.split('\n')
table1_data

['',
 '',
 '',
 '',
 'No',
 'Anantapur Division',
 'Dharmavaram Division',
 'Kadiri Division',
 'Kalyandurg Division',
 'Penukonda Division',
 '',
 '',
 '1',
 'Anantapur',
 'Bathalapalle',
 'Amadagur',
 'Beluguppa',
 'Agali',
 '',
 '',
 '2',
 'Atmakur',
 'Chennekothapalle',
 'Bukkapatnam',
 'Bommanahal',
 'Amarapuram',
 '',
 '',
 '3',
 'Bukkarayasamudram',
 'Dharmavaram',
 'Gandlapenta',
 'Brahmasamudram',
 'Chilamathur',
 '',
 '',
 '4',
 'Garladinne',
 'Kanaganapalle',
 'Kadiri',
 'D.Hirehal',
 'Gorantla',
 '',
 '',
 '5',
 'Gooty',
 'Mudigubba',
 'Kothacheruvu',
 'Gummagatta',
 'Gudibanda',
 '',
 '',
 '6',
 'Guntakal',
 'Ramagiri',
 'Nallacheruvu',
 'Kalyandurg',
 'Hindupur',
 '',
 '',
 '7',
 'Kudair',
 'Raptadu',
 'Nallamada',
 'Kambadur',
 'Lepakshi',
 '',
 '',
 '8',
 'Narpala',
 'Tadimarri',
 'Nambulapulakunta',
 'Kanekal',
 'Madakasira',
 '',
 '',
 '9',
 'Pamidi',
 '',
 'Obuladevaracheruvu',
 'Kundurpi',
 'Parigi',
 '',
 '',
 '10',
 'Peddapappur',
 '',
 'Puttaparthi',
 'Rayadurg',

In [30]:
table1_data=table1_data[13:]
table1_data

['Anantapur',
 'Bathalapalle',
 'Amadagur',
 'Beluguppa',
 'Agali',
 '',
 '',
 '2',
 'Atmakur',
 'Chennekothapalle',
 'Bukkapatnam',
 'Bommanahal',
 'Amarapuram',
 '',
 '',
 '3',
 'Bukkarayasamudram',
 'Dharmavaram',
 'Gandlapenta',
 'Brahmasamudram',
 'Chilamathur',
 '',
 '',
 '4',
 'Garladinne',
 'Kanaganapalle',
 'Kadiri',
 'D.Hirehal',
 'Gorantla',
 '',
 '',
 '5',
 'Gooty',
 'Mudigubba',
 'Kothacheruvu',
 'Gummagatta',
 'Gudibanda',
 '',
 '',
 '6',
 'Guntakal',
 'Ramagiri',
 'Nallacheruvu',
 'Kalyandurg',
 'Hindupur',
 '',
 '',
 '7',
 'Kudair',
 'Raptadu',
 'Nallamada',
 'Kambadur',
 'Lepakshi',
 '',
 '',
 '8',
 'Narpala',
 'Tadimarri',
 'Nambulapulakunta',
 'Kanekal',
 'Madakasira',
 '',
 '',
 '9',
 'Pamidi',
 '',
 'Obuladevaracheruvu',
 'Kundurpi',
 'Parigi',
 '',
 '',
 '10',
 'Peddapappur',
 '',
 'Puttaparthi',
 'Rayadurg',
 'Penu Konda',
 '',
 '',
 '11',
 'Peddavadugur',
 '',
 'Talupula',
 'Settur',
 'Roddam',
 '',
 '',
 '12',
 'Putlur',
 '',
 'Tanakal',
 '',
 'Rolla',
 '',
 ''

### Check what are the values to be removed from the list

In [31]:
x=[]
for a in table1_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)
x

['',
 '',
 '2',
 '',
 '',
 '3',
 '',
 '',
 '4',
 '',
 '',
 '5',
 '',
 '',
 '6',
 '',
 '',
 '7',
 '',
 '',
 '8',
 '',
 '',
 '9',
 '',
 '',
 '',
 '10',
 '',
 '',
 '',
 '11',
 '',
 '',
 '',
 '12',
 '',
 '',
 '',
 '',
 '13',
 '',
 '',
 '',
 '',
 '',
 '14',
 '',
 '',
 '15',
 '',
 '',
 '16',
 '',
 '',
 '17',
 '',
 '',
 '18',
 '',
 '',
 '19',
 '']

In [32]:
for a in x:
  table1_data.remove(a)
table1_data

['Anantapur',
 'Bathalapalle',
 'Amadagur',
 'Beluguppa',
 'Agali',
 'Atmakur',
 'Chennekothapalle',
 'Bukkapatnam',
 'Bommanahal',
 'Amarapuram',
 'Bukkarayasamudram',
 'Dharmavaram',
 'Gandlapenta',
 'Brahmasamudram',
 'Chilamathur',
 'Garladinne',
 'Kanaganapalle',
 'Kadiri',
 'D.Hirehal',
 'Gorantla',
 'Gooty',
 'Mudigubba',
 'Kothacheruvu',
 'Gummagatta',
 'Gudibanda',
 'Guntakal',
 'Ramagiri',
 'Nallacheruvu',
 'Kalyandurg',
 'Hindupur',
 'Kudair',
 'Raptadu',
 'Nallamada',
 'Kambadur',
 'Lepakshi',
 'Narpala',
 'Tadimarri',
 'Nambulapulakunta',
 'Kanekal',
 'Madakasira',
 'Pamidi',
 'Obuladevaracheruvu',
 'Kundurpi',
 'Parigi',
 'Peddapappur',
 'Puttaparthi',
 'Rayadurg',
 'Penu Konda',
 'Peddavadugur',
 'Talupula',
 'Settur',
 'Roddam',
 'Putlur',
 'Tanakal',
 'Rolla',
 'Singanamala',
 'Somandepalle',
 'Tadipatri',
 'Uravakonda',
 'Vajrakarur',
 'Vidapanakal',
 'Yadiki',
 'Yellanur']

In [33]:
num_Anantapur=len(table1_data)
num_Anantapur

63

In [34]:
Anantapur_info=pd.DataFrame(table1_data)

In [35]:
Anantapur_info.columns=['Neighborhood']
Anantapur_info.head()

Unnamed: 0,Neighborhood
0,Anantapur
1,Bathalapalle
2,Amadagur
3,Beluguppa
4,Agali


### Get location data for each mandal in the district using geocoder.

In [36]:
table1_lat=[]
table1_lon=[]
i=0
for a in table1_data:
  address=a+', Anantapur, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table1_lat.append(np.nan)
    table1_lon.append(np.nan)
    
  else:
    table1_lat.append(location.latitude)
    table1_lon.append(location.longitude)
  i=i+1

Bukkarayasamudram
10
Penu Konda
47


In [37]:
Buk_location=(14.694444,77.638056)
Pen_location=(14.085,77.596)

In [38]:
table1_lat[10]=Buk_location[0]
table1_lon[10]=Buk_location[1]
table1_lat[47]=Pen_location[0]
table1_lon[47]=Pen_location[1]
print(table1_lat)

[14.6546235, 14.4884866, 13.944391849999999, 14.677835850000001, 13.810991900000001, 14.6108884, 14.28327885, 14.2596433, 14.9536583, 14.09331895, 14.694444, 14.42234715, 14.097911199999999, 14.560386, 13.83712805, 14.85011435, 14.46128205, 14.16738855, 14.9353968, 13.9953644, 15.13014175, 14.3507342, 14.18653105, 14.61377435, 13.96863725, 15.11965065, 14.3063313, 14.00848955, 14.54116815, 13.826383, 14.77880375, 14.551285, 14.1652814, 14.397977950000001, 13.79705595, 14.70998665, 14.5667885, 14.0544736, 14.81941825, 13.934316249999998, 14.969948800000001, 14.008355300000002, 14.326827000000002, 13.883809549999999, 14.9296529, 14.1064568, 14.720190299999999, 14.085, 15.00587405, 14.2264385, 14.43071755, 14.07493325, 14.75206005, 13.906017049999999, 13.84850035, 14.8509937, 13.972290950000001, 14.9069559, 14.8722142, 15.006189849999998, 15.04115775, 15.055851149999999, 14.652142000000001]


In [39]:
Anantapur_info['Latitude']=table1_lat
Anantapur_info['Longitude']=table1_lon

In [40]:
Anantapur_info.values

array([['Anantapur', 14.6546235, 77.55625984224562],
       ['Bathalapalle', 14.4884866, 77.79614817972983],
       ['Amadagur', 13.944391849999999, 78.05850773919713],
       ['Beluguppa', 14.677835850000001, 77.17747288907285],
       ['Agali', 13.810991900000001, 77.04345492034327],
       ['Atmakur', 14.6108884, 77.42844899208345],
       ['Chennekothapalle', 14.28327885, 77.70099159563924],
       ['Bukkapatnam', 14.2596433, 77.92033532533532],
       ['Bommanahal', 14.9536583, 77.0287370612143],
       ['Amarapuram', 14.09331895, 76.98601792198116],
       ['Bukkarayasamudram', 14.694444, 77.638056],
       ['Dharmavaram', 14.42234715, 77.72006886674166],
       ['Gandlapenta', 14.097911199999999, 78.278756345638],
       ['Brahmasamudram', 14.560386, 76.96450652938145],
       ['Chilamathur', 13.83712805, 77.70505672711025],
       ['Garladinne', 14.85011435, 77.543886523261],
       ['Kanaganapalle', 14.46128205, 77.45562875009742],
       ['Kadiri', 14.16738855, 78.14185565],


### Anantapur Analysis

In [41]:
Anantapur_venues = getNearbyVenues(names=Anantapur_info['Neighborhood'],
                                   latitudes=Anantapur_info['Latitude'],
                                   longitudes=Anantapur_info['Longitude'],
                                   radius=(Radius_mandal[0]*1000)
                                  )

# one hot encoding
Anantapur_onehot = pd.get_dummies(Anantapur_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Anantapur_onehot['Neighborhood'] = Anantapur_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Anantapur_onehot.columns[-1]] + list(Anantapur_onehot.columns[:-1])
Anantapur_onehot = Anantapur_onehot[fixed_columns]

Anantapur_grouped = Anantapur_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Anantapur_grouped['Neighborhood']

for ind in np.arange(Anantapur_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Anantapur_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

Anantapur_grouped_clustering = Anantapur_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Anantapur_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Anantapur_merged =Anantapur_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Anantapur_merged = Anantapur_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Anantapur_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
Anantapur_merged['Cluster Labels']=Anantapur_merged['Cluster Labels'].astype('int')

address = 'Anantapur, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Toner')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Anantapur_merged['Latitude'], Anantapur_merged['Longitude'], Anantapur_merged['Neighborhood'], Anantapur_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Anantapur
Bathalapalle
Amadagur
Beluguppa
Agali
Atmakur
Chennekothapalle
Bukkapatnam
Bommanahal
Amarapuram
Bukkarayasamudram
Dharmavaram
Gandlapenta
Brahmasamudram
Chilamathur
Garladinne
Kanaganapalle
Kadiri
D.Hirehal
Gorantla
Gooty
Mudigubba
Kothacheruvu
Gummagatta
Gudibanda
Guntakal
Ramagiri
Nallacheruvu
Kalyandurg
Hindupur
Kudair
Raptadu
Nallamada
Kambadur
Lepakshi
Narpala
Tadimarri
Nambulapulakunta
Kanekal
Madakasira
Pamidi
Obuladevaracheruvu
Kundurpi
Parigi
Peddapappur
Puttaparthi
Rayadurg
Penu Konda
Peddavadugur
Talupula
Settur
Roddam
Putlur
Tanakal
Rolla
Singanamala
Somandepalle
Tadipatri
Uravakonda
Vajrakarur
Vidapanakal
Yadiki
Yellanur


# The same process is followed for the remaining 12 districts

## Chittoor

In [42]:
source2=requests.get('https://en.wikipedia.org/wiki/Chittoor_district').text
soup2=BeautifulSoup(source2,'lxml')

In [43]:
table2=soup2.find('table',class_='wikitable')
table2_data=table2.text

In [44]:
table2_data=table2_data.split('\n')

In [45]:
table2_data=table2_data[8:]

In [46]:
x=[]
for a in table2_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)

In [47]:
x.remove('palamaner')

In [48]:
for a in x:
  table2_data.remove(a)

In [49]:
table2_data[60]='Sodam'

In [50]:
num_Chittoor=len(table2_data)

In [51]:
table2_lat=[]
table2_lon=[]
i=0
for a in table2_data:
  #print(a)
  address=a+', Chittoor, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table2_lat.append(np.nan)
    table2_lon.append(np.nan)
    
  else:
    table2_lat.append(location.latitude)
    table2_lon.append(location.longitude)
  i=i+1

Buchinaidu khandriga
2
KVB Puram
8
Pitchatur
17
Sathyavedu
26
Nimmanapalli
46
Peddathippasamudram
54


In [52]:
table2_lat[2]=13.8027
table2_lon[2]=79.7326
table2_lat[8]=13.3436
table2_lon[8]=79.4141
table2_lat[17]=13.2403 
table2_lon[17]=79.4428
table2_lat[26]=13.437 
table2_lon[26]=79.956
table2_lat[46]=13.58919
table2_lon[46]=78.67559
table2_lat[54]=13.7167
table2_lon[54]=78.2000

In [53]:
Chittoor_info=pd.DataFrame(columns=['Neighborhood','Latitude','Longitude'])

In [54]:
Chittoor_info['Neighborhood']=table2_data
Chittoor_info['Latitude']=table2_lat
Chittoor_info['Longitude']=table2_lon
Chittoor_info

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Bangarupalem,13.191100,78.914701
1,B.Kothakota,13.668352,78.325703
2,Buchinaidu khandriga,13.802700,79.732600
3,Chittoor,13.160105,79.155551
4,Baireddipalle,13.093759,78.607248
...,...,...,...
61,Santhipuram,12.862092,78.385771
62,Somala,13.449441,78.848750
63,Thamballapalle,13.814044,78.416038
64,Valmikipuram,13.657330,78.698961


### Chittoor Analysis

In [55]:
Chittoor_venues = getNearbyVenues(names=Chittoor_info['Neighborhood'],
                                   latitudes=Chittoor_info['Latitude'],
                                   longitudes=Chittoor_info['Longitude'],
                                   radius=(Radius_mandal[1]*1000)
                                  )

# one hot encoding
Chittoor_onehot = pd.get_dummies(Chittoor_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Chittoor_onehot['Neighborhood'] = Chittoor_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Chittoor_onehot.columns[-1]] + list(Chittoor_onehot.columns[:-1])
Chittoor_onehot = Chittoor_onehot[fixed_columns]

Chittoor_grouped = Chittoor_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Chittoor_grouped['Neighborhood']

for ind in np.arange(Chittoor_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Chittoor_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

Chittoor_grouped_clustering = Chittoor_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Chittoor_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Chittoor_merged =Chittoor_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Chittoor_merged = Chittoor_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Chittoor_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
Chittoor_merged['Cluster Labels']=Chittoor_merged['Cluster Labels'].astype('int')

address = 'Chittoor, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Toner')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Chittoor_merged['Latitude'], Chittoor_merged['Longitude'], Chittoor_merged['Neighborhood'], Chittoor_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Bangarupalem
B.Kothakota
Buchinaidu khandriga
Chittoor
Baireddipalle
Chandragiri
Gangadhara Nellore
Chinnagottigallu
KVB Puram
Gudipala
Chowdepalle
Nagalapuram
Irala
Yerravaripalem
Pakala
Karvetinagar
Gangavaram
Pitchatur
Nagari
Gudupalle
Pulicherla
Narayanavanam
Gurramkonda
Renigunta
Nindra
Kalakada
Sathyavedu
Palasamudram
Kalikiri
Srikalahasti
Penumuru
Kambhamvaripalle
Thottambedu
Puthalapattu
Kuppam
Tirupati (rural)
Puttur
Kurabalakota
Tirupati (urban)
Ramachandrapuram
Madanapalle
Varadaiahpalem
Srirangarajapuram
Mulakalacheruvu
Yerpedu
Thavanampalle
Nimmanapalli
Vadamalapeta
palamaner
Vedurukuppam
Peddamandyam
Vijayapuram
Peddapanjani
Yadamari
Peddathippasamudram
Piler
Punganur
Ramakuppam
Ramasamudram
Rompicherla
Sodam
Santhipuram
Somala
Thamballapalle
Valmikipuram
Venkatagirikota


## East Godavari

In [56]:
source3=requests.get('https://en.wikipedia.org/wiki/East_Godavari').text
soup3=BeautifulSoup(source3,'lxml')

In [57]:
table3=soup3.find('table',class_='wikitable')
table3_data=table3.text

In [58]:
table3_data=table3_data.split('\n')

In [59]:
table3_data=table3_data[12:]

In [60]:
x=[]
for a in table3_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)

In [61]:
for a in x:
  table3_data.remove(a)

In [62]:
num_EastGodavari=len(table3_data)

In [63]:
table3_data[22]='Vararamachandrapuram'

In [64]:
table3_lat=[]
table3_lon=[]
i=0
for a in table3_data:
  address=a+', East Godavari, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table3_lat.append(np.nan)
    table3_lon.append(np.nan)
    
  else:
    table3_lat.append(location.latitude)
    table3_lon.append(location.longitude)
  i=i+1

In [65]:
East_Godavari_info=pd.DataFrame(columns=["Neighborhood",'Latitude','Longitude'])

In [66]:
East_Godavari_info['Neighborhood']=table3_data
East_Godavari_info['Latitude']=table3_lat
East_Godavari_info["Longitude"]=table3_lon
East_Godavari_info

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Atreyapuram,17.006173,81.756453
1,Chinturu,17.746060,81.393538
2,Gollaprolu,17.192004,82.308674
3,Addateegala,17.490133,82.043929
4,Alamuru,16.818711,81.878052
...,...,...,...
60,Yeleswaram,17.305588,82.106817
61,Ravulapalem,16.754667,81.831230
62,Razole,16.462868,81.861579
63,Sakhinetipalle,16.377428,81.758984


### East Godavari Analysis




In [67]:
East_Godavari_venues = getNearbyVenues(names=East_Godavari_info['Neighborhood'],
                                   latitudes=East_Godavari_info['Latitude'],
                                   longitudes=East_Godavari_info['Longitude'],
                                   radius=(Radius_mandal[2]*1000)
                                  )

# one hot encoding
East_Godavari_onehot = pd.get_dummies(East_Godavari_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
East_Godavari_onehot['Neighborhood'] = East_Godavari_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [East_Godavari_onehot.columns[-1]] + list(East_Godavari_onehot.columns[:-1])
East_Godavari_onehot = East_Godavari_onehot[fixed_columns]

East_Godavari_grouped = East_Godavari_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = East_Godavari_grouped['Neighborhood']

for ind in np.arange(East_Godavari_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(East_Godavari_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

East_Godavari_grouped_clustering = East_Godavari_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(East_Godavari_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

East_Godavari_merged =East_Godavari_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
East_Godavari_merged = East_Godavari_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

East_Godavari_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
East_Godavari_merged['Cluster Labels']=East_Godavari_merged['Cluster Labels'].astype('int')

address = 'East Godavari, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Terrain')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(East_Godavari_merged['Latitude'], East_Godavari_merged['Longitude'], East_Godavari_merged['Neighborhood'], East_Godavari_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Atreyapuram
Chinturu
Gollaprolu
Addateegala
Alamuru
Anaparthy
Devipatnam
Ainavilli
Kunavaram
Kakinada (Rural)
Gandepalle
Gokavaram
Biccavolu
Gangavaram
Allavaram
Nellipaka
Kakinada (Urban)
Jaggampeta
Kadiam
K.Gangavaram
Maredumilli
Amalapuram
Vararamachandrapuram
Karapa
Kirlampudi
Korukonda
Kajuluru
Rajavommangi
Ambajipeta
Kothapalle
Kotananduru
Rajahmundry (rural)
Kapileswarapuram
Rampachodavaram
Katrenikona
Pithapuram
Peddapuram
Rajahmundry (urban)
Mandapeta
Y.Ramavaram
Kothapeta
Samalkota
Prathipadu
Rajanagaram
Pamarru
Malikipuram
Thallarevu
Routhulapudi
Seethanagaram
Pedapudi
Mamidikuduru
Sankhavaram
Ramachandrapuram
Mummidivaram
Thondangi
Rangampeta
P.Gannavaram
Tuni
Rayavaram
I.Polavaram
Yeleswaram
Ravulapalem
Razole
Sakhinetipalle
Uppalaguptam


## Guntur District

In [68]:
source4=requests.get('https://en.wikipedia.org/wiki/Guntur_district').text
soup4=BeautifulSoup(source4,'lxml')

In [69]:
table4=soup4.find('table',class_='wikitable')
table4_data=table4.text

In [70]:
table4_data=table4_data.split('\n')

In [71]:
table4_data=table4_data[12:]

In [72]:
x=[]
for a in table4_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)

In [73]:
for a in x:
  table4_data.remove(a)

In [74]:
num_Guntur=len(table4_data)

In [75]:
table4_data.remove('Vinukonda|')
table4_data.append('Vinukonda')

In [76]:
table4_data[1]='Amruthalur'
table4_data[35]='Veldurthi'

In [77]:
table4_lat=[]
table4_lon=[]
i=0
for a in table4_data:
  #print(a)
  address=a+', Guntur, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table4_lat.append(np.nan)
    table4_lon.append(np.nan)
    
  else:
    table4_lat.append(location.latitude)
    table4_lon.append(location.longitude)
  i=i+1

Ipur
14


In [78]:
table4_lat[14]=16.229429
table4_lon[14]=79.775488

In [79]:
Guntur_info=pd.DataFrame(columns=['Neighborhood','Latitude','Longitude'])
Guntur_info['Neighborhood']=table4_data
Guntur_info['Latitude']=table4_lat
Guntur_info['Longitude']=table4_lon
Guntur_info

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Amaravati,16.51691,80.500259
1,Amruthalur,16.120739,80.652325
2,Bollapalle,16.227951,79.653867
3,Dachepalle,16.612177,79.752748
4,Atchampet,16.681911,80.121719
5,Bapatla,15.905261,80.46805
6,Chilakaluripet,16.09495,80.165878
7,Durgi,16.33032,79.504432
8,Bellamkonda,16.63447,80.015302
9,Bhattiprolu,16.105838,80.788122


### Guntur Analysis

In [80]:
Guntur_venues = getNearbyVenues(names=Guntur_info['Neighborhood'],
                                   latitudes=Guntur_info['Latitude'],
                                   longitudes=Guntur_info['Longitude'],
                                   radius=(Radius_mandal[3]*1000)
                                  )

# one hot encoding
Guntur_onehot = pd.get_dummies(Guntur_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Guntur_onehot['Neighborhood'] = Guntur_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Guntur_onehot.columns[-1]] + list(Guntur_onehot.columns[:-1])
Guntur_onehot = Guntur_onehot[fixed_columns]

Guntur_grouped = Guntur_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Guntur_grouped['Neighborhood']

for ind in np.arange(Guntur_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Guntur_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

Guntur_grouped_clustering = Guntur_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Guntur_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Guntur_merged =Guntur_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Guntur_merged = Guntur_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Guntur_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
Guntur_merged['Cluster Labels']=Guntur_merged['Cluster Labels'].astype('int')

address = 'Guntur, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Terrain')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Guntur_merged['Latitude'], Guntur_merged['Longitude'], Guntur_merged['Neighborhood'], Guntur_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Amaravati
Amruthalur
Bollapalle
Dachepalle
Atchampet
Bapatla
Chilakaluripet
Durgi
Bellamkonda
Bhattiprolu
Edlapadu
Gurazala
Guntur East
Chebrolu
Ipur
Karempudi
Guntur West
Cherukupalle
Nadendla
Machavaram
Krosuru
Duggirala
Narasaraopet
Macherla
Mangalagiri
Kakumanu
Nekarikallu
Piduguralla
Medikonduru
Karlapalem
Nuzendla
Rentachintala
Muppalla
Kollipara
Rompicherla
Veldurthi
Pedakakani
Kolluru
Savalyapuram
Pedakurapadu
Nagaram
Pedanandipadu
Nizampatnam
Phirangipuram
Pittalavanipalem
Prathipadu
Ponnur
Rajupalem
Sattenapalle
Repalle
Tadepalle
Tenali
Tadikonda
Tsunduru
Thullur
Vemuru
Vatticherukuru
Vinukonda


## YSR District

In [81]:
source5=requests.get('https://en.wikipedia.org/wiki/Kadapa_district').text
soup5=BeautifulSoup(source5,'lxml')

In [82]:
table5=soup5.find('table',class_='wikitable')
table5_data=table5.text

In [83]:
table5_data=table5_data.split('\n')

In [84]:
x=[]
for a in table5_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)

In [85]:
for a in x:
  table5_data.remove(a)

In [86]:
table5_data=table5_data[3:]

In [87]:
table5_lat=[]
table5_lon=[]
i=0
for a in table5_data:
  address=a+', YSR, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table5_lat.append(np.nan)
    table5_lon.append(np.nan)
    
  else:
    table5_lat.append(location.latitude)
    table5_lon.append(location.longitude)
  i=i+1

B Matham
4
Veeraballi
45
Sri Avadhuth Kasinayana
46


In [88]:
table5_lat[4]=14.464740
table5_lon[4]=78.780070
table5_lat[45]=14.920 
table5_lon[45]=78.5125
table5_lat[46]=15.001380
table5_lon[46]=78.917760

In [89]:
YSR_info=pd.DataFrame(columns=['Neighborhood','Latitude','Longitude'])
YSR_info['Neighborhood']=table5_data
YSR_info["Latitude"]=table5_lat
YSR_info['Longitude']=table5_lon
YSR_info

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Chakrayapet,14.235412,78.514016
1,Atlur,14.564145,79.065191
2,Chapadu,14.727503,78.657769
3,Chennur,14.556419,78.816235
4,B Matham,14.46474,78.78007
5,Duvvur,14.867483,78.666661
6,Chinnamandem,13.944612,78.656324
7,B.Kodur,14.855122,79.004669
8,Jammalamadugu,14.826042,78.399039
9,Chinthakommadinne,14.400359,78.811256


### YSR Analysis

In [90]:
YSR_venues = getNearbyVenues(names=YSR_info['Neighborhood'],
                                   latitudes=YSR_info['Latitude'],
                                   longitudes=YSR_info['Longitude'],
                                   radius=(Radius_mandal[4]*1000)
                                  )

# one hot encoding
YSR_onehot = pd.get_dummies(YSR_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
YSR_onehot['Neighborhood'] = YSR_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [YSR_onehot.columns[-1]] + list(YSR_onehot.columns[:-1])
YSR_onehot = YSR_onehot[fixed_columns]

YSR_grouped = YSR_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = YSR_grouped['Neighborhood']

for ind in np.arange(YSR_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(YSR_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

YSR_grouped_clustering = YSR_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(YSR_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

YSR_merged =YSR_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
YSR_merged = YSR_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

YSR_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
YSR_merged['Cluster Labels']=YSR_merged['Cluster Labels'].astype('int')

address = 'YSR, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Terrain')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(YSR_merged['Latitude'], YSR_merged['Longitude'], YSR_merged['Neighborhood'], YSR_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Chakrayapet
Atlur
Chapadu
Chennur
B Matham
Duvvur
Chinnamandem
B.Kodur
Jammalamadugu
Chinthakommadinne
Badvel
Kondapuram
Kadapa
Chitvel
Lingala
Galiveedu
Gopavaram
Muddanur
Kamalapuram
Kalasapadu
Mydukur
Khajipet
Kodur
Mylavaram
Lakkireddipalle
Nandalur
Peddamudium
Pendlimarri
Obulavaripalle
Proddatur
Ramapuram
Penagalur
Pulivendula
Rayachoti
Porumamilla
Rajupalem
Sambepalle
Pullampeta
Simhadripuram
T Sundupalle
Rajampet
Thondur
Vallur
Siddavatam
Vempalle
Veeraballi
Sri Avadhuth Kasinayana
Vemula
Veerapunayunipalle
Vontimitta
Yerraguntla


## Krishna

In [91]:
source6=requests.get('https://www.census2011.co.in/data/district/133-krishna-andhra-pradesh.html').text
soup6=BeautifulSoup(source6,'lxml')

In [92]:
table6=soup6.find('table').text
table6_data=table6

In [93]:
table6_data=table6_data.split('\n')

In [94]:
x=[]
for a in table6_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)

In [95]:
for a in x:
  table6_data.remove(a)

for a in table6_data:
  if a=='Krishna':
    table6_data.remove('Krishna')

In [96]:
table6_data=table6_data[2:]

In [97]:
table6_data[11]='Kaikaluru'
table6_data[24]='Vissannapeta'

In [99]:
table6_lat=[]
table6_lon=[]
i=0
for a in table6_data:
  address=a+', Krishna, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table6_lat.append(np.nan)
    table6_lon.append(np.nan)
    
  else:
    table6_lat.append(location.latitude)
    table6_lon.append(location.longitude)
  i=i+1

In [100]:
Krishna_info=pd.DataFrame(columns=['Neighborhood','Latitude','Longitude'])
Krishna_info['Neighborhood']=table6_data
Krishna_info['Latitude']=table6_lat
Krishna_info['Longitude']=table6_lon
Krishna_info

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Vijayawada Urban,16.525702,80.62519
1,Machilipatnam,16.181939,81.13513
2,Penamaluru,16.480134,80.697081
3,Gudivada,16.432998,80.993715
4,Vijayawada Rural,16.601394,80.631254
5,Nuzvid,16.790701,80.84761
6,Jaggayyapeta,16.882983,80.075253
7,Ibrahimpatnam,16.655476,80.495402
8,Nandigama,16.787738,80.28883
9,Gannavaram,16.55613,80.796151


### Krishna Analysis

In [101]:
Krishna_venues = getNearbyVenues(names=Krishna_info['Neighborhood'],
                                   latitudes=Krishna_info['Latitude'],
                                   longitudes=Krishna_info['Longitude'],
                                   radius=(Radius_mandal[5]*1000)
                                  )

# one hot encoding
Krishna_onehot = pd.get_dummies(Krishna_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Krishna_onehot['Neighborhood'] = Krishna_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Krishna_onehot.columns[-1]] + list(Krishna_onehot.columns[:-1])
Krishna_onehot = Krishna_onehot[fixed_columns]

Krishna_grouped = Krishna_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Krishna_grouped['Neighborhood']

for ind in np.arange(Krishna_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Krishna_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

Krishna_grouped_clustering = Krishna_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Krishna_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Krishna_merged =Krishna_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Krishna_merged = Krishna_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Krishna_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
Krishna_merged['Cluster Labels']=Krishna_merged['Cluster Labels'].astype('int')

address = 'Krishna, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Terrain')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Krishna_merged['Latitude'], Krishna_merged['Longitude'], Krishna_merged['Neighborhood'], Krishna_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Vijayawada Urban
Machilipatnam
Penamaluru
Gudivada
Vijayawada Rural
Nuzvid
Jaggayyapeta
Ibrahimpatnam
Nandigama
Gannavaram
Bapulapadu
Kaikaluru
Tiruvuru
Vuyyuru
Gampalagudem
Kanchikacherla
Kalidindi
Kankipadu
Mylavaram
Pedana
Mudinepalle
Agiripalle
Vatsavai
Chandarlapadu
Vissannapeta
G.Konduru
Musunuru
Pamarru
Unguturu
Pamidimukkala
Challapalle
Chatrai
Movva
Penuganchiprolu
Gudlavalleru
Guduru
Veerullapadu
Kruthivennu
Mandavalli
A.Konduru
Nagayalanka
Bantumilli
Reddigudem
Koduru
Avanigadda
Ghantasala
Thotlavalluru
Nandivada
Mopidevi
Pedaparupudi


## Kurnool

In [102]:
source7=requests.get('https://en.wikipedia.org/wiki/Kurnool_district').text
soup7=BeautifulSoup(source7,'lxml')

In [103]:
table7=soup7.find('table',class_='wikitable').text
table7_data=table7

In [104]:
table7_data=table7_data.split('\n')

In [105]:
x=[]
for a in table7_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)

In [106]:
for a in x:
  table7_data.remove(a)

In [107]:
table7_data=table7_data[3:]

In [108]:
table7_data[33]='Midhuthuru'

In [115]:
table7_lat=[]
table7_lon=[]
i=0
for a in table7_data:
  address=a+', Kurnool, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table7_lat.append(np.nan)
    table7_lon.append(np.nan)
    
  else:
    table7_lat.append(location.latitude)
    table7_lon.append(location.longitude)
  i=i+1

Gudur
12
Midhuthuru
33


In [116]:
table7_lat[12]=15.7744
table7_lon[12]=77.8053
table7_lat[33]=15.7667
table7_lon[33]=78.3000

In [119]:
Kurnool_info=pd.DataFrame(columns=['Neighborhood','Latitude','Longitude'])
Kurnool_info['Neighborhood']=table7_data
Kurnool_info['Latitude']=table7_lat
Kurnool_info['Longitude']=table7_lon
Kurnool_info

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Atmakur,15.915339,78.708406
1,Allagadda,15.165145,78.507324
2,Adoni,15.655919,77.269774
3,Bethamcherla,15.467936,78.10738
4,Banaganapalle,15.352004,78.193587
5,Alur,15.428596,77.261334
6,C.Belagal,15.813879,77.694974
7,Bandi Atmakur,15.534882,78.646484
8,Aspari,15.525631,77.397998
9,Dhone,15.375743,77.839715


### Kurnool Analysis

In [124]:
Kurnool_venues = getNearbyVenues(names=Kurnool_info['Neighborhood'],
                                   latitudes=Kurnool_info['Latitude'],
                                   longitudes=Kurnool_info['Longitude'],
                                   radius=(Radius_mandal[6]*1000)
                                  )

# one hot encoding
Kurnool_onehot = pd.get_dummies(Kurnool_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Kurnool_onehot['Neighborhood'] = Kurnool_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Kurnool_onehot.columns[-1]] + list(Kurnool_onehot.columns[:-1])
Kurnool_onehot = Kurnool_onehot[fixed_columns]

Kurnool_grouped = Kurnool_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Kurnool_grouped['Neighborhood']

for ind in np.arange(Kurnool_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Kurnool_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

Kurnool_grouped_clustering = Kurnool_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Kurnool_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Kurnool_merged =Kurnool_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Kurnool_merged = Kurnool_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Kurnool_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
Kurnool_merged['Cluster Labels']=Kurnool_merged['Cluster Labels'].astype('int')

address = 'Kurnool, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Terrain')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Kurnool_merged['Latitude'], Kurnool_merged['Longitude'], Kurnool_merged['Neighborhood'], Kurnool_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Atmakur
Allagadda
Adoni
Bethamcherla
Banaganapalle
Alur
C.Belagal
Bandi Atmakur
Aspari
Dhone
Chagalamarri
Chippagiri
Gudur
Dornipadu
Devanakonda
Jupadu Bungalow
Gadivemula
Gonegandla
Kallur
Gospadu
Halaharvi
Kodumuru
Koilkuntla
Holagunda
Kothapalle
Kolimigundla
Kosigi
Krishnagiri
Mahanandi
Kowthalam
Kurnool
Nandyal
Maddikera
Midhuthuru
Owk
Mantralayam
Nandikotkur
Panyam
Nandavaram
Orvakal
Rudravaram
Pattikonda
Pagidyala
Sanjamala
Pedda kadabur
Pamulapadu
Sirvel
Tuggali
Peapally
Uyyalawada
Yemmiganur
Srisailam
Veldurthi
Velgode


## Nellore

In [125]:
source8=requests.get('https://en.wikipedia.org/wiki/Nellore_district').text
soup8=BeautifulSoup(source8,'lxml')

In [126]:
table8=soup8.find('table',class_='wikitable').text
table8_data=table8

In [127]:
table8_data=table8_data.split('\n')

In [128]:
table8_data.remove('sydapuram')
table8_data.append('Sydapuram')

In [129]:
x=[]
for a in table8_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)

In [130]:
for a in x:
  table8_data.remove(a)

In [131]:
table8_data=table8_data[5:]

In [132]:
table8_data[26]='Tada'
table8_data[2]='Indukurpet'

In [135]:
table8_lat=[]
table8_lon=[]
i=0
for a in table8_data:
  #print(a)
  address=a+', Nellore, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table8_lat.append(np.nan)
    table8_lon.append(np.nan)
    
  else:
    table8_lat.append(location.latitude)
    table8_lon.append(location.longitude)
  i=i+1

In [136]:
Nellore_info=pd.DataFrame(columns=['Neighborhood','Latitude','Longitude'])
Nellore_info['Neighborhood']=table8_data
Nellore_info['Latitude']=table8_lat
Nellore_info['Longitude']=table8_lon
Nellore_info

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Atmakur,14.670292,79.573962
1,Doravarisatram,13.802121,79.997888
2,Indukurpet,14.511563,80.114164
3,Venkatagiri,14.010068,79.485073
4,Allur,14.683381,80.053594
5,Ananthasagaram,14.577228,79.391319
6,Naidupeta,13.909088,79.896535
7,Muthukur,14.28489,80.093787
8,Chillakur,14.148457,80.000531
9,Bogole,14.783607,80.011803


### Nellore Analysis

In [137]:
Nellore_venues = getNearbyVenues(names=Nellore_info['Neighborhood'],
                                   latitudes=Nellore_info['Latitude'],
                                   longitudes=Nellore_info['Longitude'],
                                   radius=(Radius_mandal[7]*1000)
                                  )

# one hot encoding
Nellore_onehot = pd.get_dummies(Nellore_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Nellore_onehot['Neighborhood'] = Nellore_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Nellore_onehot.columns[-1]] + list(Nellore_onehot.columns[:-1])
Nellore_onehot = Nellore_onehot[fixed_columns]

Nellore_grouped = Nellore_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Nellore_grouped['Neighborhood']

for ind in np.arange(Nellore_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Nellore_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

Nellore_grouped_clustering = Nellore_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Nellore_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Nellore_merged =Nellore_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Nellore_merged = Nellore_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Nellore_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
Nellore_merged['Cluster Labels']=Nellore_merged['Cluster Labels'].astype('int')

address = 'Nellore, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Terrain')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Nellore_merged['Latitude'], Nellore_merged['Longitude'], Nellore_merged['Neighborhood'], Nellore_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Atmakur
Doravarisatram
Indukurpet
Venkatagiri
Allur
Ananthasagaram
Naidupeta
Muthukur
Chillakur
Bogole
Anumasamudrampeta
Ojili
Nellore
Chittamur
Buchireddypalem
Chejerla
Pellakur
Podalakur
Dakkili
Dagadarthi
Duttalur
Sullurupeta
Rapur
Gudur
Jaladanki
Kaluvoya
Tada
Thotapalligudur
Kota
Kaligiri
Marripadu
Venkatachalam
Manubolu
Kavali
Sangam
Kovur
Kodavalur
Seetharamapuram
Vakadu
Kondapuram
Udayagiri
Balayapalle
Vidavalur
Vinjamur
Varikuntapadu
Sydapuram


## Prakasam

In [138]:
source9=requests.get('https://en.wikipedia.org/wiki/Prakasam_district').text
soup9=BeautifulSoup(source9,'lxml')

In [139]:
table9=soup9.find('table',class_='wikitable').text
table9_data=table9

In [140]:
table9_data=table9_data.split('\n')

In [141]:
x=[]
for a in table9_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)

In [142]:
for a in x:
  table9_data.remove(a)

In [143]:
table9_data=table9_data[3:]

In [144]:
table9_data[13]="Giddalur"
table9_data[35]="Marturu"
table9_data[41]='Parchuru'
table9_data[48]='Talluru'
table9_data[51]='Yaddanapudi'
table9_data[55]='Zarugumalli'

In [145]:
table9_lat=[]
table9_lon=[]
i=0
for a in table9_data:
  #print(a)
  address=a+', Prakasam, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table9_lat.append(np.nan)
    table9_lon.append(np.nan)
    
  else:
    table9_lat.append(location.latitude)
    table9_lon.append(location.longitude)
  i=i+1

Bestavaripeta
4
Janakavaram Panguluru
20


In [146]:
table9_lat[4]=15.5500 
table9_lon[4]=79.1167
table9_lat[20]=15.4909 
table9_lon[20]=80.0530

In [147]:
Prakasam_info=pd.DataFrame(columns=['Neighborhood','Latitude','Longitude'])
Prakasam_info['Neighborhood']=table9_data
Prakasam_info['Latitude']=table9_lat
Prakasam_info['Longitude']=table9_lon
Prakasam_info

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Chandrasekharapuram,15.182341,79.279329
1,Ardhaveedu,15.672027,78.933187
2,Addanki,15.811544,79.969307
3,Darsi,15.734746,79.706215
4,Bestavaripeta,15.55,79.1167
5,Ballikurava,16.001777,80.007622
6,Donakonda,15.81601,79.42399
7,Cumbum,15.564009,79.052059
8,Chimakurthi,15.586984,79.848684
9,Gudluru,15.061722,79.928015


### Prakasam Analysis

In [148]:
Prakasam_venues = getNearbyVenues(names=Prakasam_info['Neighborhood'],
                                   latitudes=Prakasam_info['Latitude'],
                                   longitudes=Prakasam_info['Longitude'],
                                   radius=(Radius_mandal[8]*1000)
                                  )

# one hot encoding
Prakasam_onehot = pd.get_dummies(Prakasam_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Prakasam_onehot['Neighborhood'] = Prakasam_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Prakasam_onehot.columns[-1]] + list(Prakasam_onehot.columns[:-1])
Prakasam_onehot = Prakasam_onehot[fixed_columns]

Prakasam_grouped = Prakasam_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Prakasam_grouped['Neighborhood']

for ind in np.arange(Prakasam_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Prakasam_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

Prakasam_grouped_clustering = Prakasam_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Prakasam_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Prakasam_merged =Prakasam_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Prakasam_merged = Prakasam_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Prakasam_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
Prakasam_merged['Cluster Labels']=Prakasam_merged['Cluster Labels'].astype('int')

address = 'Prakasam, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Terrain')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Prakasam_merged['Latitude'], Prakasam_merged['Longitude'], Prakasam_merged['Neighborhood'], Prakasam_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Chandrasekharapuram
Ardhaveedu
Addanki
Darsi
Bestavaripeta
Ballikurava
Donakonda
Cumbum
Chimakurthi
Gudluru
Dornala
Chinaganjam
Hanumanthunipadu
Giddalur
Chirala
Kandukur
Komarolu
Inkollu
Kanigiri
Markapur
Janakavaram Panguluru
Konakanamitla
Peda Araveedu
Karamchedu
Kondapi
Pullalacheruvu
Korisapadu
Kurichedu
Racherla
Kothapatnam
Lingasamudram
Tripuranthakam
Maddipadu
Marripudi
Yerragondapalem
Marturu
Mundlamuru
Naguluppalapadu
Pamur
Ongole
Pedacherlopalle
Parchuru
Podili
Santhamaguluru
Ponnaluru
Santhanuthalapadu
Singarayakonda
Tangutur
Talluru
Vetapalem
Tarlupadu
Yaddanapudi
Ulavapadu
Veligandla
Voletivaripalem
Zarugumalli


## Srikakulam

In [149]:
source10=requests.get('https://en.wikipedia.org/wiki/Srikakulam_district').text
soup10=BeautifulSoup(source10,'lxml')

In [150]:
table10=soup10.find('table',class_='wikitable').text
table10_data=table10

In [151]:
table10_data=table10_data.split('\n')

In [152]:
x=[]
for a in table10_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)

In [153]:
for a in x:
  table10_data.remove(a)

In [154]:
table10_data=table10_data[3:]

In [155]:
table10_data[10]='Ganguvarisigadam'
table10_data[35]='Vajrapukotturu'

In [156]:
table10_lat=[]
table10_lon=[]
i=0
for a in table10_data:
  address=a+', Srikakulam, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table10_lat.append(np.nan)
    table10_lon.append(np.nan)
    
  else:
    table10_lat.append(location.latitude)
    table10_lon.append(location.longitude)
  i=i+1

Lakshminarasupeta
16


In [157]:
table10_lat[16]=18.3659
table10_lon[16]=83.5744

In [158]:
Srikakulam_info=pd.DataFrame(columns=['Neighborhood','Latitude','Longitude'])
Srikakulam_info['Neighborhood']=table10_data
Srikakulam_info['Latitude']=table10_lat
Srikakulam_info['Longitude']=table10_lon
Srikakulam_info

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Bhamini,18.902287,83.808948
1,Amadalavalasa,18.400974,83.874341
2,Ichchapuram,19.104003,84.666806
3,Hiramandalam,18.679138,83.952412
4,Burja,18.523074,83.836609
5,Jalumuru,18.534996,84.041523
6,Kotturu,18.545188,84.236634
7,Etcherla,18.24266,83.864973
8,Kanchili,18.999336,84.565202
9,Meliaputti,18.741258,84.189899


### Srikakulam Analysis

In [159]:
Srikakulam_venues = getNearbyVenues(names=Srikakulam_info['Neighborhood'],
                                   latitudes=Srikakulam_info['Latitude'],
                                   longitudes=Srikakulam_info['Longitude'],
                                   radius=(Radius_mandal[9]*1000)
                                  )

# one hot encoding
Srikakulam_onehot = pd.get_dummies(Srikakulam_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Srikakulam_onehot['Neighborhood'] = Srikakulam_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Srikakulam_onehot.columns[-1]] + list(Srikakulam_onehot.columns[:-1])
Srikakulam_onehot = Srikakulam_onehot[fixed_columns]

Srikakulam_grouped = Srikakulam_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Srikakulam_grouped['Neighborhood']

for ind in np.arange(Srikakulam_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Srikakulam_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

Srikakulam_grouped_clustering = Srikakulam_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Srikakulam_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Srikakulam_merged =Srikakulam_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Srikakulam_merged = Srikakulam_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Srikakulam_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
Srikakulam_merged['Cluster Labels']=Srikakulam_merged['Cluster Labels'].astype('int')

address = 'East Godavari, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Terrain')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Srikakulam_merged['Latitude'], Srikakulam_merged['Longitude'], Srikakulam_merged['Neighborhood'], Srikakulam_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Bhamini
Amadalavalasa
Ichchapuram
Hiramandalam
Burja
Jalumuru
Kotturu
Etcherla
Kanchili
Meliaputti
Ganguvarisigadam
Kaviti
Palakonda
Gara
Kotabommali
Pathapatnam
Lakshminarasupeta
Mandasa
Rajam
Laveru
Nandigam
Regidi Amadalavalasa
Narasannapeta
Palasa
Saravakota
Ponduru
Santhabommali
Santhakavati
Polaki
Sompeta
Seethampeta
Ranastalam
Tekkali
Vangara
Sarubujjili
Vajrapukotturu
Veeraghattam
Srikakulam


## Vizianagaram 

In [160]:
source11=requests.get('https://www.census2011.co.in/data/district/129-vizianagaram-andhra-pradesh.html').text
soup11=BeautifulSoup(source11,'lxml')

In [161]:
table11=soup11.find('table').text
table11_data=table11

In [162]:
table11_data=table11_data.split('\n')

In [163]:
x=[]
for a in table11_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)
for a in x:
  table11_data.remove(a)

for a in table11_data:
  if a=='Vizianagaram':
    table11_data.remove('Vizianagaram')

In [164]:
table11_data=table11_data[2:]

In [165]:
table11_lat=[]
table11_lon=[]
i=0
for a in table11_data:
  address=a+', Vizianagaram, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table11_lat.append(np.nan)
    table11_lon.append(np.nan)
    
  else:
    table11_lat.append(location.latitude)
    table11_lon.append(location.longitude)
  i=i+1

In [166]:
Vizianagaram_info=pd.DataFrame(columns=['Neighborhood','Latitude','Longitude'])
Vizianagaram_info['Neighborhood']=table11_data
Vizianagaram_info['Latitude']=table11_lat
Vizianagaram_info['Longitude']=table11_lon
Vizianagaram_info

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Bobbili,18.57482,83.352259
1,Parvathipuram,18.764944,83.344588
2,Salur,18.5177,83.2081
3,Nellimarla,18.183978,83.572568
4,Srungavarapukota,18.123054,83.183226
5,Pusapatirega,18.085465,83.588338
6,Kothavalasa,17.907325,83.185251
7,Garividi,18.282024,83.532923
8,Gantyada,18.151194,83.2568
9,Gurla,18.248773,83.465297


### Vizianagaram Analysis

In [167]:
Vizianagaram_venues = getNearbyVenues(names=Vizianagaram_info['Neighborhood'],
                                   latitudes=Vizianagaram_info['Latitude'],
                                   longitudes=Vizianagaram_info['Longitude'],
                                   radius=(Radius_mandal[11]*1000)
                                  )

# one hot encoding
Vizianagaram_onehot = pd.get_dummies(Vizianagaram_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Vizianagaram_onehot['Neighborhood'] = Vizianagaram_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Vizianagaram_onehot.columns[-1]] + list(Vizianagaram_onehot.columns[:-1])
Vizianagaram_onehot = Vizianagaram_onehot[fixed_columns]

Vizianagaram_grouped = Vizianagaram_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Vizianagaram_grouped['Neighborhood']

for ind in np.arange(Vizianagaram_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Vizianagaram_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

Vizianagaram_grouped_clustering = Vizianagaram_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Vizianagaram_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Vizianagaram_merged =Vizianagaram_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Vizianagaram_merged = Vizianagaram_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Vizianagaram_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
Vizianagaram_merged['Cluster Labels']=Vizianagaram_merged['Cluster Labels'].astype('int')

address = 'Vizianagaram, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Terrain')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Vizianagaram_merged['Latitude'], Vizianagaram_merged['Longitude'], Vizianagaram_merged['Neighborhood'], Vizianagaram_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Bobbili
Parvathipuram
Salur
Nellimarla
Srungavarapukota
Pusapatirega
Kothavalasa
Garividi
Gantyada
Gurla
Cheepurupalle
Balijipeta
Therlam
Seethanagaram
Jami
Gajapathinagaram
Merakamudidam
Bhogapuram
Dattirajeru
Denkada
Lakkavarapukota
Jiyyammavalasa
Komarada
Bondapalle
Vepada
Makkuva
Ramabhadrapuram
Gummalakshmipuram
Badangi
Kurupam
Pachipenta
Mentada
Garugubilli
Vizianagaram


## Visakhapatnam

In [168]:
source12=requests.get('https://en.wikipedia.org/wiki/Visakhapatnam_district').text
soup12=BeautifulSoup(source12,'lxml')

In [169]:
table12=soup12.find('table',class_="wikitable").text
table12_data=table12

In [170]:
table12_data=table12_data.split('\n')

In [171]:
x=[]
for a in table12_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)
for a in x:
  table12_data.remove(a)

In [172]:
table12_data=table12_data[4:]

In [173]:
table12_data[1]='Atchutapuram'
table12_data[16]='Devarapalle'
table12_data[43]='Maharani Peta'

In [174]:
table12_lat=[]
table12_lon=[]
i=0
for a in table12_data:
  #print(a)
  address=a+', Visakhapatnam, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table12_lat.append(np.nan)
    table12_lon.append(np.nan)
    
  else:
    table12_lat.append(location.latitude)
    table12_lon.append(location.longitude)
  i=i+1

In [175]:
Visakhapatnam_info=pd.DataFrame(columns=['Neighborhood','Latitude','Longitude'])
Visakhapatnam_info['Neighborhood']=table12_data
Visakhapatnam_info['Latitude']=table12_lat
Visakhapatnam_info['Longitude']=table12_lon
Visakhapatnam_info

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Anakapalle,17.68897,83.003476
1,Atchutapuram,17.561147,83.009065
2,Ananthagiri,18.173234,82.998002
3,Anandapuram,17.878,83.304
4,Butchayyapeta,17.776233,82.86517
5,Golugonda,17.674896,82.470064
6,Araku Valley,18.293009,82.903429
7,Bheemunipatnam,17.891381,83.451218
8,Cheedikada,17.937441,82.891547
9,Kotauratla,17.543961,82.690095


### Visakhapatnam Analysis

In [176]:
Visakhapatnam_venues = getNearbyVenues(names=Visakhapatnam_info['Neighborhood'],
                                   latitudes=Visakhapatnam_info['Latitude'],
                                   longitudes=Visakhapatnam_info['Longitude'],
                                   radius=(Radius_mandal[10]*1000)
                                  )

# one hot encoding
Visakhapatnam_onehot = pd.get_dummies(Visakhapatnam_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Visakhapatnam_onehot['Neighborhood'] = Visakhapatnam_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Visakhapatnam_onehot.columns[-1]] + list(Visakhapatnam_onehot.columns[:-1])
Visakhapatnam_onehot = Visakhapatnam_onehot[fixed_columns]

Visakhapatnam_grouped = Visakhapatnam_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Visakhapatnam_grouped['Neighborhood']

for ind in np.arange(Visakhapatnam_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Visakhapatnam_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

Visakhapatnam_grouped_clustering = Visakhapatnam_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Visakhapatnam_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Visakhapatnam_merged =Visakhapatnam_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Visakhapatnam_merged = Visakhapatnam_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Visakhapatnam_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
Visakhapatnam_merged['Cluster Labels']=Visakhapatnam_merged['Cluster Labels'].astype('int')

address = 'Visakhapatnam, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Terrain')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Visakhapatnam_merged['Latitude'], Visakhapatnam_merged['Longitude'], Visakhapatnam_merged['Neighborhood'], Visakhapatnam_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Anakapalle
Atchutapuram
Ananthagiri
Anandapuram
Butchayyapeta
Golugonda
Araku Valley
Bheemunipatnam
Cheedikada
Kotauratla
Chintapalle
Gajuwaka
Chodavaram
Makavarapalem
Dumbriguda
Padmanabham
Devarapalle
Nakkapalle
G.Madugula
Paravada
K.Kotapadu
Nathavaram
Gudem Kotha Veedhi
Pedagantyada
Kasimkota
Narsipatnam
Hukumpeta
Pendurthi
Madugula
Payakaraopeta
Koyyuru
Sabbavaram
Munagapaka
Rambilli
Munchingi Puttu
Visakhapatnam Rural
Ravikamatham
S.Rayavaram
Paderu
Seethammadhara
Rolugunta
Elamanchili
Peda Bayalu
Maharani Peta
Gopalapatnam
Mulagada


## West Godavari

In [177]:
source13=requests.get('https://www.census2011.co.in/data/district/132-west-godavari-andhra-pradesh.html').text
soup13=BeautifulSoup(source13,'lxml')

In [178]:
table13=soup13.find('table').text
table13_data=table13

In [179]:
table13_data=table13_data.split('\n')

In [180]:
x=[]
for a in table13_data:
  if a=='':
    x.append(a)
  elif a[0]<'A' or a[0]>'Z':
    x.append(a)
for a in x:
  table13_data.remove(a)

for a in table13_data:
  if a=='West Godavari':
    table13_data.remove('West Godavari')

In [181]:
table13_data=table13_data[2:]

In [182]:
table13_lat=[]
table13_lon=[]
i=0
for a in table13_data:
  address=a+', West Godavari, Andhra Pradesh, India'
  geolocator=Nominatim(user_agent=a)
  location=geolocator.geocode(address)
  if location==None:
    print(a)
    print(i)
    table13_lat.append(np.nan)
    table13_lon.append(np.nan)
    
  else:
    table13_lat.append(location.latitude)
    table13_lon.append(location.longitude)
  i=i+1

In [183]:
WestGodavari_info=pd.DataFrame(columns=['Neighborhood','Latitude','Longitude'])
WestGodavari_info['Neighborhood']=table13_data
WestGodavari_info['Latitude']=table13_lat
WestGodavari_info['Longitude']=table13_lon
WestGodavari_info

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Eluru,16.676135,81.170868
1,Bhimavaram,16.542769,81.527344
2,Tadepalligudem,16.876587,81.545145
3,Tanuku,16.75414,81.685839
4,Narasapuram,16.438627,81.701501
5,Palacole,16.531372,81.685531
6,Nidadavole,16.906563,81.654235
7,Jangareddigudem,17.097316,81.314309
8,Kovvur,17.017968,81.710121
9,Chintalapudi,17.097628,80.964127


### West Godavari Analysis

In [184]:
WestGodavari_venues = getNearbyVenues(names=WestGodavari_info['Neighborhood'],
                                   latitudes=WestGodavari_info['Latitude'],
                                   longitudes=WestGodavari_info['Longitude'],
                                   radius=(Radius_mandal[12]*1000)
                                  )

# one hot encoding
WestGodavari_onehot = pd.get_dummies(WestGodavari_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
WestGodavari_onehot['Neighborhood'] = WestGodavari_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [WestGodavari_onehot.columns[-1]] + list(WestGodavari_onehot.columns[:-1])
WestGodavari_onehot = WestGodavari_onehot[fixed_columns]

WestGodavari_grouped = WestGodavari_onehot.groupby('Neighborhood').mean().reset_index()

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = WestGodavari_grouped['Neighborhood']

for ind in np.arange(WestGodavari_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(WestGodavari_grouped.iloc[ind, :], num_top_venues)

# set number of clusters
kclusters = 3

WestGodavari_grouped_clustering = WestGodavari_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(WestGodavari_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

WestGodavari_merged =WestGodavari_info

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
WestGodavari_merged = WestGodavari_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

WestGodavari_merged['Cluster Labels'].replace(np.nan,-1,inplace=True)
WestGodavari_merged['Cluster Labels']=WestGodavari_merged['Cluster Labels'].astype('int')

address = 'West Godavari, Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11,tiles='Stamen Terrain')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(WestGodavari_merged['Latitude'], WestGodavari_merged['Longitude'], WestGodavari_merged['Neighborhood'], WestGodavari_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Eluru
Bhimavaram
Tadepalligudem
Tanuku
Narasapuram
Palacole
Nidadavole
Jangareddigudem
Kovvur
Chintalapudi
Pedavegi
Unguturu
Nallajerla
Devarapalle
Undrajavaram
Koyyalagudem
Akividu
Mogalthur
Yelamanchili
Pentapadu
Peravali
Penugonda
Pedapadu
Dwarakatirumala
Attili
Kalla
Denduluru
Palacoderu
Undi
Iragavaram
Poduru
Bhimadole
Ganapavaram
Veeravasaram
Chagallu
Gopalapuram
Achanta
Penumantra
Lingapalem
Kamavarapukota
T.Narasapuram
Buttayagudem
Tallapudi
Nidamarru
Polavaram
Jeelugu Milli


# Analysis of the entire state(District wise)

In [188]:
Anantapur_final=pd.DataFrame(columns=Anantapur_grouped.columns[1:])
Anantapur_final['District']=['Anantapur']
for x in Anantapur_grouped.columns[1:]:
  Anantapur_final[x]=Anantapur_grouped[x].mean()

Anantapur_final
    

Unnamed: 0,ATM,Andhra Restaurant,Bed & Breakfast,Boat or Ferry,Border Crossing,Breakfast Spot,Bus Station,Cafeteria,Café,City,...,Rock Climbing Spot,Sandwich Place,Tea Room,Temple,Theme Restaurant,Toll Plaza,Trail,Train Station,Vegetarian / Vegan Restaurant,District
0,0.055991,0.026114,0.013441,0.017204,0.005376,0.016129,0.064516,0.03871,0.004608,0.008065,...,0.006452,0.016129,0.026114,0.008065,0.004608,0.005376,0.016129,0.139324,0.032258,Anantapur


In [190]:
Chittoor_final=pd.DataFrame(columns=Chittoor_grouped.columns[1:])
Chittoor_final['District']=['Chittoor']
for x in Chittoor_grouped.columns[1:]:
  Chittoor_final[x]=Chittoor_grouped[x].mean()

East_Godavari_final=pd.DataFrame(columns=East_Godavari_grouped.columns[1:])
East_Godavari_final['District']=['East_Godavari']
for x in East_Godavari_grouped.columns[1:]:
  East_Godavari_final[x]=East_Godavari_grouped[x].mean()

Guntur_final=pd.DataFrame(columns=Guntur_grouped.columns[1:])
Guntur_final['District']=['Guntur']
for x in Guntur_grouped.columns[1:]:
  Guntur_final[x]=Guntur_grouped[x].mean()

YSR_final=pd.DataFrame(columns=YSR_grouped.columns[1:])
YSR_final['District']=['YSR']
for x in YSR_grouped.columns[1:]:
  YSR_final[x]=YSR_grouped[x].mean()

Krishna_final=pd.DataFrame(columns=Krishna_grouped.columns[1:])
Krishna_final['District']=['Krishna']
for x in Krishna_grouped.columns[1:]:
  Krishna_final[x]=Krishna_grouped[x].mean()

Kurnool_final=pd.DataFrame(columns=Kurnool_grouped.columns[1:])
Kurnool_final['District']=['Kurnool']
for x in Kurnool_grouped.columns[1:]:
  Kurnool_final[x]=Kurnool_grouped[x].mean()

Nellore_final=pd.DataFrame(columns=Nellore_grouped.columns[1:])
Nellore_final['District']=['Nellore']
for x in Nellore_grouped.columns[1:]:
  Nellore_final[x]=Nellore_grouped[x].mean()

Prakasam_final=pd.DataFrame(columns=Prakasam_grouped.columns[1:])
Prakasam_final['District']=['Prakasam']
for x in Prakasam_grouped.columns[1:]:
  Prakasam_final[x]=Prakasam_grouped[x].mean()

Srikakulam_final=pd.DataFrame(columns=Srikakulam_grouped.columns[1:])
Srikakulam_final['District']=['Srikakulam']
for x in Srikakulam_grouped.columns[1:]:
  Srikakulam_final[x]=Srikakulam_grouped[x].mean()

Vizianagaram_final=pd.DataFrame(columns=Vizianagaram_grouped.columns[1:])
Vizianagaram_final['District']=['Vizianagaram']
for x in Vizianagaram_grouped.columns[1:]:
  Vizianagaram_final[x]=Vizianagaram_grouped[x].mean()

Visakhapatnam_final=pd.DataFrame(columns=Visakhapatnam_grouped.columns[1:])
Visakhapatnam_final['District']=['Visakhapatnam']
for x in Visakhapatnam_grouped.columns[1:]:
  Visakhapatnam_final[x]=Visakhapatnam_grouped[x].mean()

WestGodavari_final=pd.DataFrame(columns=WestGodavari_grouped.columns[1:])
WestGodavari_final['District']=['WestGodavari']
for x in WestGodavari_grouped.columns[1:]:
  WestGodavari_final[x]=WestGodavari_grouped[x].mean()

frames=[Anantapur_final,Chittoor_final,East_Godavari_final,Guntur_final,YSR_final,Krishna_final,Kurnool_final,Nellore_final,Prakasam_final,Srikakulam_final,Visakhapatnam_final,Vizianagaram_final,WestGodavari_final]

In [196]:
Final=pd.concat(frames)
x=District_info['District'].values
Final['District']=x
Final_Lat=[]
Final_Lon=[]
for a in x:
    address=a
    geolocator = Nominatim(user_agent=a)
    location = geolocator.geocode(address)
    Final_Lat.append(location.latitude)
    Final_Lon.append(location.longitude)

Final.set_index('District',inplace=True)
Final.replace(np.nan,0,inplace=True)
Final['Latitude']=Final_Lat
Final['Longitude']=Final_Lon

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,ATM,Adult Boutique,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Andhra Restaurant,Aquarium,...,Tunnel,Turkish Restaurant,Vegetarian / Vegan Restaurant,Warehouse Store,Watch Shop,Water Park,Waterfall,Waterfront,Latitude,Longitude
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Anantapur,0.055991,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026114,0.0,...,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,14.654623,77.55626
Chittoor,0.05506,0.0,0.024107,0.0,0.0,0.00625,0.008036,0.0,0.008036,0.017857,...,0.0,0.0,0.01994,0.0,0.0,0.0,0.003571,0.0,13.160105,79.155551
East Godavari,0.091667,0.003788,0.010732,0.0,0.0,0.011364,0.0,0.0,0.008523,0.0,...,0.0,0.0,0.010428,0.0,0.0,0.0,0.011364,0.0,17.233496,81.722599
Guntur,0.052083,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030655,0.0,...,0.0,0.0,0.000833,0.0,0.0,0.007143,0.0,0.016667,16.291519,80.454159
YSR Kadapa,0.020202,0.0,0.021212,0.0,0.0,0.0,0.0,0.0,0.015152,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005051,14.475294,78.821686
Krishna,0.149123,0.0,0.017544,0.002193,0.002193,0.004386,0.006579,0.026316,0.0,0.0,...,0.0,0.0,0.001754,0.008772,0.0,0.0,0.0,0.008772,16.669152,80.719002
Kurnool,0.072917,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.830925,78.042537
Nellore,0.133951,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009259,0.0,...,0.0,0.0,0.007407,0.0,0.005265,0.0,0.0,0.0,14.449372,79.987376
Prakasam,0.071939,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.004762,0.0,0.0,0.0,0.0,0.0,15.5,79.5
Srikakulam,0.206522,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.320022,83.916077


In [200]:
Final_cluster=Final.drop(['Latitude','Longitude','ATM'],axis=1)
clust=KMeans(n_clusters=3, random_state=0).fit(Final_cluster)
clust.labels_

array([1, 2, 1, 1, 0, 1, 1, 1, 0, 2, 1, 1, 1])

In [202]:
District_info['Cluster Labels']=clust.labels_

In [204]:
address = 'Andhra Pradesh, India'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=9,tiles='Stamen Terrain')

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Final['Latitude'], Final['Longitude'], Final.index, District_info['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[-cluster+1],
        fill=True,
        fill_color=rainbow[-cluster+1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Thanks for going through the notebook
#### Author: Sreemukh.M