# IBM Applied Data Science Capstone Course by Coursera


### Importing Modules and libraries

In [171]:
import pandas as pd
import numpy as np
import requests
import lxml.html as lh
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import folium # map rendering library

### Importing dataset and storing in DataSet

In [82]:
website_url = requests.get('https://ceodelhi.gov.in/Content/EntireDelhiLocalities.aspx').text

In [83]:
soup = BeautifulSoup(website_url,'lxml')

In [111]:
table = soup.find_all('table', attrs={'width':"607"})

In [118]:
from IPython.display import display_html
tab = str(table)
display_html(tab,raw=True)

0,1,2
,,
AC_NO,AC NAME,IMPORTANT LOCALITIES
1,NERELA,SECTOR-A6 NARELA
1,NERELA,VIJAY NAGAR NARELA
1,NERELA,(METRO VIHAR PHASE-I)HOLAMBI KALAN
1,NERELA,(METRO VIHAR PHASE-II)HOLAMBI KALAN
1,NERELA,DESU COLONY NARELA
1,NERELA,GAUTAM COLONY NARELA
1,NERELA,INDRA COLONY
1,NERELA,MATRO VIHAR HOLAMBI KHURD


In [131]:
Delhis = pd.read_html(tab)
Delhi= Delhis[0]
Delhi.columns = ['Constituency Number', 'Constituency Name', 'Area']

In [132]:
Delhi.head()

Unnamed: 0,Constituency Number,Constituency Name,Area
0,,,
1,AC_NO,AC NAME,IMPORTANT LOCALITIES
2,1,NERELA,SECTOR-A6 NARELA
3,1,NERELA,VIJAY NAGAR NARELA
4,1,NERELA,(METRO VIHAR PHASE-I)HOLAMBI KALAN


### Data Cleaning and Preprocessing

#### Deleting NaN rows

In [133]:
Delhi = Delhi.dropna()

#### Deleting first row

In [134]:
Delhi = Delhi.drop([1])

In [135]:
Delhi.reset_index(drop=True)

Unnamed: 0,Constituency Number,Constituency Name,Area
0,1,NERELA,SECTOR-A6 NARELA
1,1,NERELA,VIJAY NAGAR NARELA
2,1,NERELA,(METRO VIHAR PHASE-I)HOLAMBI KALAN
3,1,NERELA,(METRO VIHAR PHASE-II)HOLAMBI KALAN
4,1,NERELA,DESU COLONY NARELA
...,...,...,...
2284,70,KARAWAL NAGAR,TUKMIRPUR & VILL
2285,70,KARAWAL NAGAR,TUKMIRPUR EXTN
2286,70,KARAWAL NAGAR,VILL KHAJOORI KHAS
2287,70,KARAWAL NAGAR,WEST KAMAL VIHAR


### Grouping the Constituencies

In [152]:
delhi_area = Delhi.groupby(['Constituency Number', 'Constituency Name'], sort=False).agg( ', '.join)

In [154]:
delhi_area.reset_index(inplace=True)

In [157]:
delhi_area.head()

Unnamed: 0,Constituency Number,Constituency Name,Area
0,1,NERELA,"SECTOR-A6 NARELA, VIJAY NAGAR NARELA, (METRO V..."
1,2,BURARI,"BURARI, IBRAHIMPUR, JAHANGIRPURI RESTTLEMENT C..."
2,3,TIMARPUR,"B D ESTATE, B R HOSPITAL, BHAI PARMANAND COLON..."
3,4,ADARSH NAGAR,"ADARSH NAGAR, AZADPUR, DDA FLAT JAHANGIR PURI,..."
4,5,BADLI,"BADLI VILLAGE, BHALSWA DAIRY, CHANDAN PARK, IN..."


In [158]:
delhi_area.shape

(70, 3)

### Getting the geographical coodinates

In [181]:
# define a function to get coordinates
def get_latlng(constiuency):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Delhi, India'.format(constiuency))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [182]:
coords = [ get_latlng(neighborhood) for neighborhood in delhi_area["Constituency Name"].tolist() ]

In [183]:
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

###  Merge the coordinates into the original dataframe

In [184]:
delhi_area['Latitude'] = df_coords['Latitude']
delhi_area['Longitude'] = df_coords['Longitude']

In [185]:
delhi_area

Unnamed: 0,Constituency Number,Constituency Name,Area,Latitude,Longitude
0,1,NERELA,"SECTOR-A6 NARELA, VIJAY NAGAR NARELA, (METRO V...",28.839790,77.076960
1,2,BURARI,"BURARI, IBRAHIMPUR, JAHANGIRPURI RESTTLEMENT C...",28.756600,77.206950
2,3,TIMARPUR,"B D ESTATE, B R HOSPITAL, BHAI PARMANAND COLON...",28.700780,77.221160
3,4,ADARSH NAGAR,"ADARSH NAGAR, AZADPUR, DDA FLAT JAHANGIR PURI,...",28.720350,77.172640
4,5,BADLI,"BADLI VILLAGE, BHALSWA DAIRY, CHANDAN PARK, IN...",28.731650,77.134060
...,...,...,...,...,...
65,66,GHONDA,"4TH PUSTA KARTAR NAGAR J-BLOCK, A-BLOCK BHAJAN...",28.690197,77.269925
66,67,BABARPUR,"BABAR PUR VILLAGE (EAST BABAR PUR), BALBIR NAG...",28.507380,77.303460
67,68,GOKALPUR,"BHAGIRATHI VIHAR, EAST GOKALPUR, GANGA VIHAR, ...",28.703600,77.282940
68,69,MUSTAFABAD,"AMAR VIHAR, AMBIKA VIHAR, BABU NAGAR, BHAGAT V...",28.714800,77.274990


### Getting the coordinates of Delhi, India

In [186]:
address = 'Delhi, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Delhi, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Delhi, India 28.6517178, 77.2219388.


### Building the map of Delhi, India

In [187]:
map_dl = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(delhi_area['Latitude'], delhi_area['Longitude'], delhi_area['Constituency Name']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=6,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_dl)  
    
map_dl

###  Saving the Map of Delhi

In [188]:
map_dl.save('map_dl.html')

In [189]:
# define Foursquare Credentials and Version
CLIENT_ID = '5BYDGKIJO3LQK3J1AROCIG4WLUNPD4H230P1TNPCQRQRC5IO' # your Foursquare ID
CLIENT_SECRET = 'L1BXLVNK4MECUEBMC02ECXCWFMNS5DJOY3AJDMHP1HWNWMVX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 5BYDGKIJO3LQK3J1AROCIG4WLUNPD4H230P1TNPCQRQRC5IO
CLIENT_SECRET:L1BXLVNK4MECUEBMC02ECXCWFMNS5DJOY3AJDMHP1HWNWMVX


In [193]:
radius = 1000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(delhi_area['Latitude'], delhi_area['Longitude'], delhi_area['Constituency Name']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [194]:
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(831, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,NERELA,28.83979,77.07696,Axis Bank ATM,28.839769,77.076927,ATM
1,NERELA,28.83979,77.07696,Axis Bank ATM,28.839769,77.076927,ATM
2,NERELA,28.83979,77.07696,Satyam Multiplex,28.846124,77.08334,Multiplex
3,NERELA,28.83979,77.07696,vicky traders,28.84618,77.083427,Furniture / Home Store
4,BURARI,28.7566,77.20695,Axis Bank ATM,28.756599,77.206947,ATM


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,NERELA,28.83979,77.07696,Axis Bank ATM,28.839769,77.076927,ATM
1,NERELA,28.83979,77.07696,Axis Bank ATM,28.839769,77.076927,ATM
2,NERELA,28.83979,77.07696,Satyam Multiplex,28.846124,77.083340,Multiplex
3,NERELA,28.83979,77.07696,vicky traders,28.846180,77.083427,Furniture / Home Store
4,BURARI,28.75660,77.20695,Axis Bank ATM,28.756599,77.206947,ATM
...,...,...,...,...,...,...,...
495,JANGPURA,28.58340,77.24719,Chi Kitchen & Bar,28.581381,77.242000,Chinese Restaurant
496,JANGPURA,28.58340,77.24719,bloomrooms @ Link Rd,28.581481,77.239549,Hotel
497,JANGPURA,28.58340,77.24719,McDonald's,28.581301,77.241830,Fast Food Restaurant
498,JANGPURA,28.58340,77.24719,Karims,28.591396,77.243657,Mughlai Restaurant
