In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

import numpy as np # library to handle data in a vectorized manner

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
#=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.18.1               |             py_0          51 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          84 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.18.1-py_0 conda-forge


Downloading and Extracting Packages
geopy-1.18.1         | 51 KB     | ##################################### | 100% 
geographiclib-1.49   | 32 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
Solving environme

In [2]:
website_url = requests.get("https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Mumbai").text

In [3]:
soup = BeautifulSoup(website_url,'lxml')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of neighbourhoods in Mumbai - Wikipedia
  </title>
  <script>
   document.documentElement.className = document.documentElement.className.replace( /(^|\s)client-nojs(\s|$)/, "$1client-js$2" );
  </script>
  <script>
   (window.RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"List_of_neighbourhoods_in_Mumbai","wgTitle":"List of neighbourhoods in Mumbai","wgCurRevisionId":874356815,"wgRevisionId":874356815,"wgArticleId":37060396,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["CS1 errors: dates","CS1 maint: Unfit url","Neighbourhoods in Mumbai","Lists of neighbourhoods in Indian cities","Mumbai-related lists"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTa

In [4]:
My_table = soup.find_all('span',{'class':'mw-headline'})
My_table[0:10]

[<span class="mw-headline" id="Western_Suburbs">Western Suburbs</span>,
 <span class="mw-headline" id="Andheri"><a href="/wiki/Andheri" title="Andheri">Andheri</a></span>,
 <span class="mw-headline" id="Bandra"><a href="/wiki/Bandra" title="Bandra">Bandra</a></span>,
 <span class="mw-headline" id="Borivali"><a href="/wiki/Borivali" title="Borivali">Borivali</a></span>,
 <span class="mw-headline" id="Dahisar"><a href="/wiki/Dahisar" title="Dahisar">Dahisar</a></span>,
 <span class="mw-headline" id="Goregaon"><a href="/wiki/Goregaon" title="Goregaon">Goregaon</a></span>,
 <span class="mw-headline" id="Jogeshwari"><a href="/wiki/Jogeshwari" title="Jogeshwari">Jogeshwari</a></span>,
 <span class="mw-headline" id="Juhu"><a href="/wiki/Juhu" title="Juhu">Juhu</a></span>,
 <span class="mw-headline" id="Kandivali_west"><a class="new" href="/w/index.php?title=Kandivali_west&amp;action=edit&amp;redlink=1" title="Kandivali west (page does not exist)">Kandivali west</a></span>,
 <span class="mw-he

In [5]:
neighborhoods = []
for n in My_table:
    neighborhoods.append(n.get('id'))
neighborhoods=neighborhoods[0:39]
neighborhoods.remove('Western_Suburbs')
neighborhoods.remove('Eastern_Suburbs')
neighborhoods.remove('Harbour_Suburbs')
neighborhoods.remove('South_Mumbai')
for name,i in zip(neighborhoods,range(35)):
    if "_" in name:
        name=name.replace("_"," ")
        neighborhoods[i]=name
print(neighborhoods)

['Andheri', 'Bandra', 'Borivali', 'Dahisar', 'Goregaon', 'Jogeshwari', 'Juhu', 'Kandivali west', 'Khar', 'Malad', 'Santacruz', 'Vile Parle', 'Bhandup', 'Ghatkopar', 'Kanjurmarg', 'Kurla', 'Mulund', 'Powai', 'Vidyavihar', 'Vikhroli', 'Chembur', 'Govandi', 'Mankhurd', 'Trombay', 'Antop Hill', 'Byculla', 'Colaba', 'Dadar', 'Fort', 'Girgaon', 'Kalbadevi', 'Kamathipura', 'Matunga', 'Parel', 'Tardeo']


In [6]:
df = pd.DataFrame()
df['Neighborhoods'] = neighborhoods
df

Unnamed: 0,Neighborhoods
0,Andheri
1,Bandra
2,Borivali
3,Dahisar
4,Goregaon
5,Jogeshwari
6,Juhu
7,Kandivali west
8,Khar
9,Malad


In [7]:
def get_coords_local(neigh, output_as='center'):
    # create url
    url = '{0}{1}{2}'.format('http://nominatim.openstreetmap.org/search.php?q=',
                             neigh+',Mumbai,India',
                             '&format=json&polygon=0')
    response = requests.get(url).json()[0]

    # parse response to list
    lst = [response.get(key) for key in ['lat','lon']]
    output = [float(i) for i in lst]
    return output

In [8]:
df2 = df.copy()

latitudeCln = []
longitudeCln = []
for index, row in df2.iterrows():
    print(row[0])
    lat, long = get_coords_local(neigh=row[0], output_as='center')
    latitudeCln.append(lat)
    longitudeCln.append(long)

df2['Latitude'] = latitudeCln
df2['Longitude'] = longitudeCln

df2.shape

Andheri
Bandra
Borivali
Dahisar
Goregaon
Jogeshwari
Juhu
Kandivali west
Khar
Malad
Santacruz
Vile Parle
Bhandup
Ghatkopar
Kanjurmarg
Kurla
Mulund
Powai
Vidyavihar
Vikhroli
Chembur
Govandi
Mankhurd
Trombay
Antop Hill
Byculla
Colaba
Dadar
Fort
Girgaon
Kalbadevi
Kamathipura
Matunga
Parel
Tardeo


(35, 3)

In [9]:
df2

Unnamed: 0,Neighborhoods,Latitude,Longitude
0,Andheri,19.120371,72.848043
1,Bandra,19.054979,72.84022
2,Borivali,19.228738,72.856877
3,Dahisar,19.257178,72.857536
4,Goregaon,19.164973,72.849543
5,Jogeshwari,19.135734,72.848923
6,Juhu,19.107021,72.827528
7,Kandivali west,19.20838,72.842227
8,Khar,19.072457,72.833707
9,Malad,19.184677,72.835807


In [10]:
address = 'Mumbai, India'

geolocator = Nominatim()
location = geolocator.geocode(address, timeout=60, exactly_one=True)
latitude = location.latitude
longitude = location.longitude
print('The decimal coordinates of Mumbai are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The decimal coordinates of Mumbai are 18.9387711, 72.8353355.


In [11]:
# make a copy of the dataframe to get it simply back if needed
df3 = df2.copy()
df3

Unnamed: 0,Neighborhoods,Latitude,Longitude
0,Andheri,19.120371,72.848043
1,Bandra,19.054979,72.84022
2,Borivali,19.228738,72.856877
3,Dahisar,19.257178,72.857536
4,Goregaon,19.164973,72.849543
5,Jogeshwari,19.135734,72.848923
6,Juhu,19.107021,72.827528
7,Kandivali west,19.20838,72.842227
8,Khar,19.072457,72.833707
9,Malad,19.184677,72.835807


In [12]:
# create map of Mumbai using latitude and longitude values
map_mumbai = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, local in zip(df3['Latitude'], df3['Longitude'], df3['Neighborhoods']):
    label = '{}'.format(local)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_mumbai)  
    
map_mumbai

In [13]:
# function to repeat the exploring process to all the neighborhoods in Mumbai
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

In [14]:
LIMIT = 500 # limit of number of venues returned by Foursquare API
radius = 5000 # define radius
CLIENT_ID = 'DDGRV21D13YLHPIIRFJEP0YY12YOJGBY0LCYI0P5W52SNVD2'
CLIENT_SECRET = 'UR52BUDIMAC2RLKPRT32NT32P2DRKMUARMOP43OX01HXE11A'
VERSION = '20181020'

In [15]:
# Use category id 4bf58dd8d48988d175941735 to only get gyms
mumbai_venues_gym = getNearbyVenues(names=df3['Neighborhoods'], latitudes=df3['Latitude'], longitudes=df3['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d175941735')
mumbai_venues_gym.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Andheri,19.120371,72.848043,Hotel Imperial Palace Mumbai,19.118523,72.850853,Yoga Studio
1,Andheri,19.120371,72.848043,SweatZone,19.129021,72.84881,Gym
2,Andheri,19.120371,72.848043,Synergym,19.118623,72.842381,Gym
3,Andheri,19.120371,72.848043,Step n Dance Fitness Hub,19.113742,72.855497,Gym / Fitness Center
4,Andheri,19.120371,72.848043,Fitness Hub,19.123392,72.850568,Gym / Fitness Center


In [16]:
mumbai_venues_gym.shape

(480, 7)

In [17]:
# function to add markers for given venues to map
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Neighborhood'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [18]:
map_mumbai_gym = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(mumbai_venues_gym, 'red', map_mumbai_gym)
map_mumbai_gym

In [19]:
mumbai_venues_highschools = getNearbyVenues(names=df3['Neighborhoods'], latitudes=df3['Latitude'], longitudes=df3['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d13d941735')
mumbai_venues_highschools.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Andheri,19.120371,72.848043,Bhatia Commerce Classes,19.122598,72.846387,High School
1,Andheri,19.120371,72.848043,Vissanji Academy,19.119943,72.852647,High School
2,Andheri,19.120371,72.848043,Swss high school,19.11771,72.841095,High School
3,Andheri,19.120371,72.848043,Gundavli Municipal School,19.116824,72.853582,High School
4,Andheri,19.120371,72.848043,Little Flower High School,19.112666,72.85402,High School


In [20]:
mumbai_venues_highschools.shape

(158, 7)

In [21]:
map_mumbai_highschools = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(mumbai_venues_highschools, 'green', map_mumbai_highschools)
map_mumbai_highschools

In [22]:
mumbai_venues_uni = getNearbyVenues(names=df3['Neighborhoods'], latitudes=df3['Latitude'], longitudes=df3['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d1ae941735')
mumbai_venues_uni.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Andheri,19.120371,72.848043,MVLU College,19.122342,72.850916,University
1,Andheri,19.120371,72.848043,MeritTrac Services Pvt Ltd.,19.126361,72.855047,University
2,Andheri,19.120371,72.848043,Tata Compound municipal School,19.110757,72.8415,University
3,Bandra,19.054979,72.84022,Maharashtra State Board Of Technical Education,19.061999,72.846641,University
4,Borivali,19.228738,72.856877,maac borivali,19.226078,72.855081,University


In [23]:
mumbai_venues_uni.shape

(54, 7)

In [24]:
map_mumbai_uni = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(mumbai_venues_uni, 'gold', map_mumbai_uni)
map_mumbai_uni

In [25]:
mumbai_venues_office = getNearbyVenues(names=df3['Neighborhoods'], latitudes=df3['Latitude'], longitudes=df3['Longitude'], radius=1000, categoryIds='4d4b7105d754a06375d81259')
mumbai_venues_office.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Andheri,19.120371,72.848043,andheri police station,19.120525,72.848279,Police Station
1,Andheri,19.120371,72.848043,Nadco,19.118748,72.84551,Department Store
2,Andheri,19.120371,72.848043,criticare hospital,19.118263,72.850639,Hospital
3,Andheri,19.120371,72.848043,"surane road, Trade Avenue Bldg, Aajtak Tv Toda...",19.119793,72.847524,Tech Startup
4,Andheri,19.120371,72.848043,MTC Headquarters,19.118611,72.848239,Conference Room


In [26]:
mumbai_venues_office.shape

(1597, 7)

In [27]:
map_mumbai_office = folium.Map(location=[latitude, longitude])
addToMap(mumbai_venues_office.head(1000), 'fuchsia', map_mumbai_office)
map_mumbai_office

In [28]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Neighborhood').count()
    
    for n in startDf['Neighborhood']:
        try:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = 0

In [29]:
df_data = df3.copy()
df_data.rename(columns={'Neighborhoods':'Neighborhood'}, inplace=True)
addColumn(df_data, 'Gym', mumbai_venues_gym)
addColumn(df_data, 'High Schools', mumbai_venues_highschools)
addColumn(df_data, 'Universities', mumbai_venues_uni)
addColumn(df_data, 'Offices', mumbai_venues_office)
df_data

Unnamed: 0,Neighborhood,Latitude,Longitude,Gym,High Schools,Universities,Offices
0,Andheri,19.120371,72.848043,18.0,5.0,3.0,50.0
1,Bandra,19.054979,72.84022,19.0,4.0,1.0,50.0
2,Borivali,19.228738,72.856877,20.0,8.0,4.0,47.0
3,Dahisar,19.257178,72.857536,17.0,3.0,1.0,48.0
4,Goregaon,19.164973,72.849543,15.0,5.0,3.0,49.0
5,Jogeshwari,19.135734,72.848923,9.0,2.0,0.0,46.0
6,Juhu,19.107021,72.827528,18.0,3.0,2.0,49.0
7,Kandivali west,19.20838,72.842227,21.0,5.0,1.0,42.0
8,Khar,19.072457,72.833707,37.0,10.0,5.0,50.0
9,Malad,19.184677,72.835807,25.0,4.0,0.0,50.0


In [30]:
# negative weight, because Nikhil wants to open a gym and thus wants to avoid concurrence as much as possible
weight_gym = -1

# positive weight, because high school students are good customers
weight_schools = 1

# positive weight, because university students are better customers
weight_uni = 1.5

# positive weight because employees are even best customers
weight_offices = 2

In [31]:
df_weighted = df_data[['Neighborhood']].copy()

In [32]:
df_weighted['Score'] = df_data['Gym'] * weight_gym + df_data['High Schools'] * weight_schools + df_data['Universities'] * weight_uni + df_data['Offices'] * weight_offices
df_weighted = df_weighted.sort_values(by=['Score'], ascending=False)
df_weighted

Unnamed: 0,Neighborhood,Score
28,Fort,103.0
27,Dadar,102.5
25,Byculla,99.0
30,Kalbadevi,98.5
19,Vikhroli,98.0
14,Kanjurmarg,93.5
4,Goregaon,92.5
13,Ghatkopar,92.5
24,Antop Hill,91.5
0,Andheri,91.5


In [33]:
map_mum_result = folium.Map(location=[latitude, longitude], zoom_start=15)

mum_win = df3[df3['Neighborhoods'] == 'Fort']

for lat, lng, local in zip(mum_win['Latitude'], mum_win['Longitude'], mum_win['Neighborhoods']):
    label = '{}'.format(local)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7).add_to(map_mum_result) 

addToMap(mumbai_venues_gym[mumbai_venues_gym['Neighborhood'] == 'Fort'], 'red', map_mum_result)
addToMap(mumbai_venues_highschools[mumbai_venues_highschools['Neighborhood'] == 'Fort'], 'green', map_mum_result)
addToMap(mumbai_venues_uni[mumbai_venues_uni['Neighborhood'] == 'Fort'], 'gold', map_mum_result)
addToMap(mumbai_venues_office[mumbai_venues_office['Neighborhood'] == 'Fort'], 'fuchsia', map_mum_result)

map_mum_result

In [None]:
### Fort Neighborhood is the best option for Nikhil in order to open his Gym.