# Capstone Project – The Battle of Neighborhoods | Finding a Better Place in Mumbai

### 1. Installing and Importing Python Libraries and Dependencies

In [1]:
!pip install geocoder
!pip install folium



Importing Libraries

In [1]:
import pandas as pd
import requests
import numpy as np
import geocoder
import folium
import requests 
import matplotlib.cm as cm
import matplotlib.colors as colors
import json
import xml
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

from pandas.io.json import json_normalize 
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim 
from bs4 import BeautifulSoup

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

print("All Required Libraries Imported!")

All Required Libraries Imported!


### 2.  Data Extraction and Cleaning

Using BeautifulSoup Scraping List of Postal Codes of Given Wikipedia Page. 
Link: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Mumbai"
extracting_data = requests.get(url).text
wiki_data = BeautifulSoup(extracting_data, 'lxml')

In [15]:
#data = {}
table = wiki_data.find("table", attrs={"class": "wikitable"})
    # Get headers of table i.e., Rank, Country, GDP.
t_headers = []
for th in table.find_all("th"):
    # remove any newlines and extra spaces from left and right
    t_headers.append(th.text.replace('\n', ' ').strip())
# Get all the rows of table
table_data = []
for tr in table.tbody.find_all("tr"): # find all tr's from table's tbody
    t_row = {}
    # Each table row is stored in the form of
    # t_row = {'Rank': '', 'Country/Territory': '', 'GDP(US$million)': ''}

    # find all td's(3) in tr and zip it with t_header
    for td, th in zip(tr.find_all("td"), t_headers): 
        t_row[th] = td.text.replace('\n', '').strip()
    table_data.append(t_row)

    # Put the data for the table with his heading.
    #data[heading] = table_data

In [17]:
table_data=pd.DataFrame(table_data)

In [21]:
table_data.dropna(inplace=True)
table_data.reset_index(drop=True,inplace=True)
table_data

Unnamed: 0,Area,Latitude,Location,Longitude
0,Amboli,19.1293,"Andheri,Western Suburbs",72.8434
1,"Chakala, Andheri",19.111388,Western Suburbs,72.860833
2,D.N. Nagar,19.124085,"Andheri,Western Suburbs",72.831373
3,Four Bungalows,19.124714,"Andheri,Western Suburbs",72.82721
4,Lokhandwala,19.130815,"Andheri,Western Suburbs",72.82927
5,Marol,19.119219,"Andheri,Western Suburbs",72.882743
6,Sahar,19.098889,"Andheri,Western Suburbs",72.867222
7,Seven Bungalows,19.129052,"Andheri,Western Suburbs",72.817018
8,Versova,19.12,"Andheri,Western Suburbs",72.82
9,Mira Road,19.284167,"Mira-Bhayandar,Western Suburbs",72.871111


In [23]:
df = table_data
df.head()

Unnamed: 0,Area,Latitude,Location,Longitude
0,Amboli,19.1293,"Andheri,Western Suburbs",72.8434
1,"Chakala, Andheri",19.111388,Western Suburbs,72.860833
2,D.N. Nagar,19.124085,"Andheri,Western Suburbs",72.831373
3,Four Bungalows,19.124714,"Andheri,Western Suburbs",72.82721
4,Lokhandwala,19.130815,"Andheri,Western Suburbs",72.82927


In [24]:
df.describe()

Unnamed: 0,Area,Latitude,Location,Longitude
count,93,93.0,93,93.0
unique,93,85.0,31,82.0
top,Nehru Nagar,18.95,South Mumbai,72.83
freq,1,3.0,30,4.0


In [25]:
# def get_latilong(postal_code):
#     lati_long_coords = None
#     while(lati_long_coords is None):
#         g = geocoder.arcgis('{}, Mumbai, Maharashtra'.format(postal_code))
#         lati_long_coords = g.latlng
#     return lati_long_coords
    
# get_latilong('400078')

[19.18791016800003, 72.92371000000003]

In [14]:
# # Retrieving Postal Code Co-ordinates
# postal_codes = df_2['Postalcode']    
# coords = [ get_latilong(postal_code) for postal_code in postal_codes.tolist() ]

In [15]:
# # Adding Columns Latitude & Longitude
# df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
# df_2['Latitude'] = df_coords['Latitude']
# df_2['Longitude'] = df_coords['Longitude']

In [16]:
# df_2[df_2.Postalcode == 'M5G']

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
57,M5G,Downtown Toronto,Central Bay Street,43.656091,-79.38493


In [17]:
# df_2.head(10)

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.785665,-79.158725
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765815,-79.175193
3,M1G,Scarborough,Woburn,43.768369,-79.21759
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944
5,M1J,Scarborough,Scarborough Village,43.743125,-79.23175
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.726276,-79.263625
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.713054,-79.285055
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.724235,-79.227925
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.69677,-79.259967


In [26]:
address = 'Mumbai,Maharashtra'

geolocator = Nominatim(user_agent='coursera-project')
location = geolocator.geocode(address)
latitude_x = location.latitude
longitude_y = location.longitude
print('The Geograpical Co-ordinate of Mumbai,Maharashtra are {}, {}.'.format(latitude_x, longitude_y))

The Geograpical Co-ordinate of Mumbai,Maharashtra are 19.0759899, 72.8773928.


### 3. Map of Mumbai

In [47]:
map_Mumbai = folium.Map(location=[latitude_x, longitude_y], zoom_start=10)


for lat, lng, nei in zip(df['Latitude'], df['Longitude'], df['Area']):
#     label = '{}'.format(nei)
#     label = folium.Popup(label, parse_html=True)
    folium.vector_layers.CircleMarker(
        [lat, lng],
        radius=5,
        popup=str(nei),
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Mumbai)  
    
    
map_Mumbai

In [48]:
# @hiddel_cell
CLIENT_ID = '2OBMGTZX4BHWQA0DJBDV11UUMYMZQVQ3RV4ZXCMZPDWPRFPK' # my Foursquare ID
CLIENT_SECRET = 'H21CM3Z2J5Z03ZMOYQ1K2O55N1DBIJYD3NGKVB0013NBQGXF' # my Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: '+CLIENT_ID)
print('CLIENT_SECRET: '+CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2OBMGTZX4BHWQA0DJBDV11UUMYMZQVQ3RV4ZXCMZPDWPRFPK
CLIENT_SECRET: H21CM3Z2J5Z03ZMOYQ1K2O55N1DBIJYD3NGKVB0013NBQGXF


In [64]:
radius = 1000 
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude_x, 
   longitude_y, 
    radius, 
   LIMIT)
results = requests.get(url).json()

In [65]:
venues=results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)
nearby_venues.columns

Index(['reasons.count', 'reasons.items', 'referralId', 'venue.categories',
       'venue.id', 'venue.location.address', 'venue.location.cc',
       'venue.location.city', 'venue.location.country',
       'venue.location.crossStreet', 'venue.location.distance',
       'venue.location.formattedAddress', 'venue.location.labeledLatLngs',
       'venue.location.lat', 'venue.location.lng', 'venue.location.postalCode',
       'venue.location.state', 'venue.name', 'venue.photos.count',
       'venue.photos.groups'],
      dtype='object')

In [66]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### 4. Nearby Venues/Locations

In [67]:
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
nearby_venues.head()

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Delhi Zaika,"[{'id': '4bf58dd8d48988d10f941735', 'name': 'I...",19.077054,72.87826
1,Pizza Hut,"[{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...",19.075984,72.877656
2,The Bar Stock Exchange,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",19.071166,72.876359
3,Costa Coffee,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",19.073535,72.872516
4,kurla market,"[{'id': '4bf58dd8d48988d1f7941735', 'name': 'F...",19.079207,72.880212


### 5. Categories of Nearby Venues/Locations

In [68]:
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head(5)

Unnamed: 0,name,categories,lat,lng
0,Delhi Zaika,Indian Restaurant,19.077054,72.87826
1,Pizza Hut,Pizza Place,19.075984,72.877656
2,The Bar Stock Exchange,Bar,19.071166,72.876359
3,Costa Coffee,Coffee Shop,19.073535,72.872516
4,kurla market,Flea Market,19.079207,72.880212


In [69]:
# Top 10 Categories
a=pd.Series(nearby_venues.categories)
a.value_counts()

Flea Market                       2
Indian Restaurant                 2
Bar                               2
Café                              1
Coffee Shop                       1
Auto Workshop                     1
BBQ Joint                         1
Pizza Place                       1
Fast Food Restaurant              1
Lake                              1
Bus Station                       1
Multicuisine Indian Restaurant    1
Grocery Store                     1
Name: categories, dtype: int64

In [70]:
nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Delhi Zaika,Indian Restaurant,19.077054,72.87826
1,Pizza Hut,Pizza Place,19.075984,72.877656
2,The Bar Stock Exchange,Bar,19.071166,72.876359
3,Costa Coffee,Coffee Shop,19.073535,72.872516
4,kurla market,Flea Market,19.079207,72.880212
5,Aman Hotel ( Baba Seekh Paratha Corner),BBQ Joint,19.07512,72.872262
6,Di Bella Coffee,Café,19.070946,72.875999
7,Bail Bazar,Flea Market,19.080737,72.881089
8,Truetrammtrunk,Bar,19.072919,72.871205
9,Mahesh Stores,Grocery Store,19.080775,72.871885


In [71]:
def getNearbyVenues(names, latitudes, longitudes, radius=700):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # making GET request
        venue_results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in venue_results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [72]:
# Nearby Venues
Mumbai_venues = getNearbyVenues(names=df['Area'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Amboli
Chakala, Andheri
D.N. Nagar
Four Bungalows
Lokhandwala
Marol
Sahar
Seven Bungalows
Versova
Mira Road
Bhayandar
Uttan
Bandstand Promenade
Kherwadi
Pali Hill
I.C. Colony
Gorai
Dahisar
Aarey Milk Colony
Bangur Nagar
Jogeshwari West
Juhu
Charkop
Poisar
Mahavir Nagar
Thakur village
Pali Naka
Khar Danda
Dindoshi
Sunder Nagar
Kalina
Naigaon
Nalasopara
Virar
Irla
Vile Parle
Bhandup
Amrut Nagar
Asalfa
Pant Nagar
Kanjurmarg
Nehru Nagar
Nahur
Chandivali
Hiranandani Gardens
Indian Institute of Technology Bombay campus
Vidyavihar
Vikhroli
Chembur
Deonar
Mankhurd
Mahul
Agripada
Altamount Road
Bhuleshwar
Breach Candy
Carmichael Road
Cavel
Churchgate
Cotton Green
Cuffe Parade
Cumbala Hill
Currey Road
Dhobitalao
Dongri
Kala Ghoda
Kemps Corner
Lower Parel
Mahalaxmi
Mahim
Malabar Hill
Marine Drive
Marine Lines
Mumbai Central
Nariman Point
Prabhadevi
Sion
Walkeshwar
Worli
C.G.S. colony
Dagdi Chawl
Navy Nagar
Hindu colony
Ballard Estate
Chira Bazaar
Fanas Wadi
Chor Bazaar
Matunga
Parel
Gowalia Tank


In [73]:
print('There are {} Uniques Categories.'.format(len(Mumbai_venues['Venue Category'].unique())))
Mumbai_venues.groupby('Neighborhood').count().head()

There are 203 Uniques Categories.


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agripada,10,10,10,10,10,10
Altamount Road,46,46,46,46,46,46
Amboli,19,19,19,19,19,19
Amrut Nagar,48,48,48,48,48,48
Asalfa,4,4,4,4,4,4


### One Hot Encoding of Features

In [74]:
# one hot encoding
Mumbai_onehot = pd.get_dummies(Mumbai_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Mumbai_onehot['Neighborhood'] = Mumbai_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Mumbai_onehot.columns[-1]] + list(Mumbai_onehot.columns[:-1])
Mumbai_onehot = Mumbai_onehot[fixed_columns]
Mumbai_grouped = Mumbai_onehot.groupby('Neighborhood').mean().reset_index()
Mumbai_onehot.head(5)

Unnamed: 0,Zoo,ATM,Accessories Store,Afghan Restaurant,Airport Lounge,American Restaurant,Amphitheater,Antique Shop,Arcade,Art Gallery,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beach,Beer Bar,Beer Garden,Bengali Restaurant,Bistro,Boat or Ferry,Bookstore,Boutique,Breakfast Spot,Brewery,Burger Joint,Bus Station,Business Service,Cafeteria,Café,Cantonese Restaurant,Chaat Place,Cheese Shop,Chinese Restaurant,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Auditorium,Comedy Club,Comfort Food Restaurant,Concert Hall,Convenience Store,Convention Center,Cosmetics Shop,Creperie,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dhaba,Dim Sum Restaurant,Diner,Dog Run,Donut Shop,Dumpling Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Garden Center,Gastropub,General Entertainment,German Restaurant,Gift Shop,Gluten-free Restaurant,Goan Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Halal Restaurant,Harbor / Marina,Health & Beauty Service,Health Food Store,History Museum,Hookah Bar,Hotel,Hotel Bar,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Indoor Play Area,Intersection,Irani Cafe,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karaoke Bar,Lake,Light Rail Station,Lighthouse,Liquor Store,Lounge,Maharashtrian Restaurant,Market,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Movie Theater,Moving Target,Multicuisine Indian Restaurant,Multiplex,Music Store,Music Venue,Neighborhood,Nightclub,Noodle House,North Indian Restaurant,Office,Other Great Outdoors,Paper / Office Supplies Store,Park,Parsi Restaurant,Performing Arts Venue,Pharmacy,Photography Studio,Pizza Place,Platform,Playground,Plaza,Pool,Pub,Punjabi Restaurant,Racetrack,Rental Car Location,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,Road,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shawarma Place,Shoe Store,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,South Indian Restaurant,Southern / Soul Food Restaurant,Souvenir Shop,Spa,Spanish Restaurant,Sports Bar,Sports Club,Stadium,Steakhouse,Sushi Restaurant,Tea Room,Tennis Court,Tex-Mex Restaurant,Thai Restaurant,Theater,Theme Park,Toy / Game Store,Train,Train Station,Vegetarian / Vegan Restaurant,Video Store,Waterfront,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Amboli,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Amboli,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Amboli,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Amboli,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Amboli,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [75]:
num_top_venues = 5
for hood in Mumbai_grouped['Neighborhood']:
    print("---- "+hood+" ----")
    temp =Mumbai_grouped[Mumbai_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Agripada ----
               venue  freq
0  Indian Restaurant   0.3
1        Coffee Shop   0.2
2             Bakery   0.1
3          Racetrack   0.1
4         Restaurant   0.1


---- Altamount Road ----
               venue  freq
0               Café  0.11
1        Coffee Shop  0.07
2     Sandwich Place  0.07
3             Bakery  0.07
4  Indian Restaurant  0.04


---- Amboli ----
               venue  freq
0        Coffee Shop  0.16
1  Indian Restaurant  0.11
2   Asian Restaurant  0.11
3                Bar  0.11
4        Pizza Place  0.11


---- Amrut Nagar ----
                venue  freq
0   Indian Restaurant  0.10
1                Café  0.08
2      Clothing Store  0.08
3         Pizza Place  0.04
4  Chinese Restaurant  0.04


---- Asalfa ----
               venue  freq
0  Indian Restaurant  0.25
1        Bus Station  0.25
2  Convenience Store  0.25
3         Donut Shop  0.25
4                Zoo  0.00


---- Ballard Estate ----
               venue  freq
0    Harbor / Marina  

               venue  freq
0        Snack Place  0.19
1  Indian Restaurant  0.19
2     Ice Cream Shop  0.10
3               Café  0.10
4          Juice Bar  0.05


---- Jogeshwari West ----
                  venue  freq
0  Fast Food Restaurant  0.26
1     Indian Restaurant  0.11
2        Ice Cream Shop  0.05
3                Bakery  0.05
4      Department Store  0.05


---- Juhu ----
               venue  freq
0                Bar  0.09
1  Indian Restaurant  0.09
2              Hotel  0.09
3                Spa  0.07
4               Café  0.07


---- Kala Ghoda ----
                venue  freq
0   Indian Restaurant  0.15
1         Coffee Shop  0.09
2                Café  0.09
3        Dessert Shop  0.04
4  Seafood Restaurant  0.04


---- Kalina ----
                  venue  freq
0     Indian Restaurant  0.09
1            Steakhouse  0.06
2                Market  0.06
3            Food Truck  0.06
4  Fast Food Restaurant  0.06


---- Kanjurmarg ----
                     venue  freq
0    

               venue  freq
0                Bar  0.09
1  Indian Restaurant  0.09
2              Hotel  0.09
3                Spa  0.07
4               Café  0.07


---- Virar ----
                       venue  freq
0                 Restaurant  0.25
1  Indian Chinese Restaurant  0.25
2       Fast Food Restaurant  0.25
3                 Theme Park  0.25
4                  Nightclub  0.00


---- Walkeshwar ----
               venue  freq
0  Indian Restaurant  0.25
1  Convenience Store  0.25
2        Coffee Shop  0.25
3         Lighthouse  0.25
4                Zoo  0.00


---- Worli ----
               venue  freq
0  Indian Restaurant  0.14
1   Asian Restaurant  0.09
2        Bus Station  0.09
3         Food Court  0.09
4              Diner  0.05




In [76]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Most Common venues near neighborhood

In [77]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Mumbai_grouped['Neighborhood']

for ind in np.arange(Mumbai_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Mumbai_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agripada,Indian Restaurant,Coffee Shop,Japanese Restaurant,Racetrack,Restaurant,Bank,Bakery,Electronics Store,Flea Market,Fish Market
1,Altamount Road,Café,Bakery,Sandwich Place,Coffee Shop,Chinese Restaurant,Indian Restaurant,Snack Place,Park,Pizza Place,Electronics Store
2,Amboli,Coffee Shop,Pizza Place,Asian Restaurant,Indian Restaurant,Bar,Italian Restaurant,Fast Food Restaurant,Electronics Store,Chinese Restaurant,Sandwich Place
3,Amrut Nagar,Indian Restaurant,Café,Clothing Store,Diner,Pizza Place,Chinese Restaurant,Lounge,Fast Food Restaurant,Gourmet Shop,Food & Drink Shop
4,Asalfa,Convenience Store,Indian Restaurant,Bus Station,Donut Shop,Yoga Studio,Event Space,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop


### K-Means Clustering Approach

In [78]:
# Using K-Means to cluster neighborhood into 3 clusters
Mumbai_grouped_clustering = Mumbai_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=3, random_state=0).fit(Mumbai_grouped_clustering)
kmeans.labels_

array([0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 2,
       2, 1, 2, 0, 2, 0, 2, 0, 0, 2, 0, 2, 1, 0, 2, 1, 0, 2, 2, 0, 0, 2,
       2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2,
       2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 0, 2, 2, 0, 1, 2, 2, 2, 2, 2, 0, 0])

In [79]:
df

Unnamed: 0,Area,Latitude,Location,Longitude
0,Amboli,19.1293,"Andheri,Western Suburbs",72.8434
1,"Chakala, Andheri",19.111388,Western Suburbs,72.860833
2,D.N. Nagar,19.124085,"Andheri,Western Suburbs",72.831373
3,Four Bungalows,19.124714,"Andheri,Western Suburbs",72.82721
4,Lokhandwala,19.130815,"Andheri,Western Suburbs",72.82927
5,Marol,19.119219,"Andheri,Western Suburbs",72.882743
6,Sahar,19.098889,"Andheri,Western Suburbs",72.867222
7,Seven Bungalows,19.129052,"Andheri,Western Suburbs",72.817018
8,Versova,19.12,"Andheri,Western Suburbs",72.82
9,Mira Road,19.284167,"Mira-Bhayandar,Western Suburbs",72.871111


In [80]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Mumbai_merged =df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Mumbai_merged = pd.merge(Mumbai_merged,neighborhoods_venues_sorted.set_index('Neighborhood'), left_on='Area',right_on='Neighborhood')

Mumbai_merged.head()# check the last columns!

Unnamed: 0,Area,Latitude,Location,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Amboli,19.1293,"Andheri,Western Suburbs",72.8434,2,Coffee Shop,Pizza Place,Asian Restaurant,Indian Restaurant,Bar,Italian Restaurant,Fast Food Restaurant,Electronics Store,Chinese Restaurant,Sandwich Place
1,"Chakala, Andheri",19.111388,Western Suburbs,72.860833,2,Hotel,Café,Restaurant,Pizza Place,Fast Food Restaurant,Indian Restaurant,Gift Shop,Cocktail Bar,Salon / Barbershop,Bar
2,D.N. Nagar,19.124085,"Andheri,Western Suburbs",72.831373,2,Bar,Gym / Fitness Center,Indian Restaurant,Sandwich Place,Lounge,Snack Place,Residential Building (Apartment / Condo),Chinese Restaurant,Sports Club,Arts & Entertainment
3,Four Bungalows,19.124714,"Andheri,Western Suburbs",72.82721,2,Pizza Place,Bar,Indian Restaurant,Snack Place,Restaurant,Electronics Store,Clothing Store,Chinese Restaurant,Japanese Restaurant,Gym
4,Lokhandwala,19.130815,"Andheri,Western Suburbs",72.82927,2,Indian Restaurant,Lounge,Coffee Shop,Bar,Pizza Place,Clothing Store,Juice Bar,Pub,Chinese Restaurant,Mediterranean Restaurant


### Map of Clusters

In [81]:
kclusters = 10

In [83]:
# create map
map_clusters = folium.Map(location=[latitude_x, longitude_y], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
colors_array = cm.rainbow(np.linspace(0, 1, kclusters))
rainbow = [colors.rgb2hex(i) for i in colors_array]
print(rainbow)
# add markers to the map

markers_colors = []
for lat, lon, nei , cluster in zip(Mumbai_merged['Latitude'], 
                                   Mumbai_merged['Longitude'], 
                                   Mumbai_merged['Area'], 
                                   Mumbai_merged['Cluster Labels']):
    label = folium.Popup(str(nei) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

['#8000ff', '#4856fb', '#10a2f0', '#2adddd', '#62fbc4', '#9cfba4', '#d4dd80', '#ffa256', '#ff562c', '#ff0000']


In [39]:
df1=Mumbai_merged.loc[df1=Mumbai_merged.loc[df1=Mumbai_merged.loc[Scarborough_merged['Cluster Labels'] == 0,Mumbai_merged.columns[[2] + list(range(5, Mumbai_merged.shape[1]))]]
['Cluster Labels'] == 0,Mumbai_merged.columns[[2] + list(range(5, Mumbai_merged.shape[1]))]]
['Cluster Labels'] == 0,Mumbai_merged.columns[[2] + list(range(5, Mumbai_merged.shape[1]))]]


df2=Mumbai_merged.loc[df1=Mumbai_merged.loc[df1=Mumbai_merged.loc[Scarborough_merged['Cluster Labels'] == 0,Mumbai_merged.columns[[2] + list(range(5, Mumbai_merged.shape[1]))]]
['Cluster Labels'] == 0,Mumbai_merged.columns[[2] + list(range(5, Mumbai_merged.shape[1]))]]
['Cluster Labels'] == 1,Mumbai_merged.columns[[2] + list(range(5, df1=Mumbai_merged.loc[Scarborough_merged['Cluster Labels'] == 0,Mumbai_merged.columns[[2] + list(range(5, Mumbai_merged.shape[1]))]]
.shape[1]))]]


df3=Mumbai_merged.loc[df1=Mumbai_merged.loc[df1=Mumbai_merged.loc[Scarborough_merged['Cluster Labels'] == 0,Mumbai_merged.columns[[2] + list(range(5, Mumbai_merged.shape[1]))]]
['Cluster Labels'] == 0,Mumbai_merged.columns[[2] + list(range(5, Mumbai_merged.shape[1]))]]
['Cluster Labels'] == 2,Mumbai_merged.columns[[2] + list(range(5, df1=Mumbai_merged.loc[df1=Mumbai_merged.loc[Scarborough_merged['Cluster Labels'] == 0,Mumbai_merged.columns[[2] + list(range(5, Mumbai_merged.shape[1]))]]
['Cluster Labels'] == 0,Mumbai_merged.columns[[2] + list(range(5, Mumbai_merged.shape[1]))]]
.shape[1]))]]

In [87]:
cluster0=Mumbai_merged[Mumbai_merged['Cluster Labels']==0]['Area'].unique().tolist()
cluster1=Mumbai_merged[Mumbai_merged['Cluster Labels']==1]['Area'].unique().tolist()
cluster2=Mumbai_merged[Mumbai_merged['Cluster Labels']==2]['Area'].unique().tolist()

['Amboli',
 'Chakala, Andheri',
 'D.N. Nagar',
 'Four Bungalows',
 'Lokhandwala',
 'Seven Bungalows',
 'Versova',
 'Mira Road',
 'Bhayandar',
 'Bandstand Promenade',
 'Kherwadi',
 'Pali Hill',
 'I.C. Colony',
 'Dahisar',
 'Bangur Nagar',
 'Jogeshwari West',
 'Juhu',
 'Charkop',
 'Poisar',
 'Mahavir Nagar',
 'Thakur village',
 'Pali Naka',
 'Dindoshi',
 'Sunder Nagar',
 'Kalina',
 'Naigaon',
 'Nalasopara',
 'Virar',
 'Vile Parle',
 'Bhandup',
 'Amrut Nagar',
 'Kanjurmarg',
 'Chandivali',
 'Hiranandani Gardens',
 'Vidyavihar',
 'Vikhroli',
 'Mankhurd',
 'Altamount Road',
 'Breach Candy',
 'Carmichael Road',
 'Churchgate',
 'Cotton Green',
 'Cumbala Hill',
 'Kemps Corner',
 'Lower Parel',
 'Mahim',
 'Marine Drive',
 'Marine Lines',
 'Mumbai Central',
 'Nariman Point',
 'C.G.S. colony',
 'Navy Nagar',
 'Parel',
 'Dharavi']

In [89]:
print('cluster0 contains following neighbourhoods: ')
print(cluster0)
print('cluster1 contains following neighbourhoods: ')
print(cluster1)
print('cluster2 contains following neighbourhoods: ')
print(cluster2)

cluster0 contains following neighbourhoods: 
['Marol', 'Sahar', 'Khar Danda', 'Irla', 'Asalfa', 'Pant Nagar', 'Nahur', 'Indian Institute of Technology Bombay\xa0campus', 'Chembur', 'Deonar', 'Agripada', 'Bhuleshwar', 'Cavel', 'Currey Road', 'Dhobitalao', 'Kala Ghoda', 'Malabar Hill', 'Prabhadevi', 'Sion', 'Walkeshwar', 'Worli', 'Dagdi Chawl', 'Ballard Estate', 'Chira Bazaar', 'Fanas Wadi', 'Chor Bazaar', 'Matunga', 'Gowalia Tank', 'Dava Bazaar', 'Thane']
cluster1 contains following neighbourhoods: 
['Uttan', 'Gorai', 'Cuffe Parade', 'Dongri']
cluster2 contains following neighbourhoods: 
['Amboli', 'Chakala, Andheri', 'D.N. Nagar', 'Four Bungalows', 'Lokhandwala', 'Seven Bungalows', 'Versova', 'Mira Road', 'Bhayandar', 'Bandstand Promenade', 'Kherwadi', 'Pali Hill', 'I.C. Colony', 'Dahisar', 'Bangur Nagar', 'Jogeshwari West', 'Juhu', 'Charkop', 'Poisar', 'Mahavir Nagar', 'Thakur village', 'Pali Naka', 'Dindoshi', 'Sunder Nagar', 'Kalina', 'Naigaon', 'Nalasopara', 'Virar', 'Vile Parle', 

Conclusion: In this project, using k-means cluster algorithm I separated the neighborhood into three different clusters which have very-similar neighborhoods around them.