## Finding neighborhoods to open new Shopping Malls in Mumbai, India

In [281]:
import numpy as np
import pandas as pd
import json
from geopy.geocoders import Nominatim 
import geocoder
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
from foursquare import Foursquare
import folium 

## Scraping Data from Wikipedia

In [5]:
data = requests.get('https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Mumbai').text
soup = BeautifulSoup(data, 'html.parser')

neighborhoodList = []

for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)

In [23]:
df = pd.DataFrame({"Neighborhood": neighborhoodList})
df = df[1:]
print(df.shape)
df.head()

(135, 1)


Unnamed: 0,Neighborhood
1,Aarey Milk Colony
2,Agripada
3,Altamount Road
4,"Amboli, Mumbai"
5,Amrut Nagar


## Getting latitude and longitude

In [27]:
def get_latlng(neighborhood):
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Mumbai, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords
coords = [ get_latlng(neighborhood) for neighborhood in df["Neighborhood"].tolist() ]

In [31]:
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude']
df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
1,Aarey Milk Colony,18.97628,72.82615
2,Agripada,18.964338,72.807837
3,Altamount Road,18.94017,72.83483
4,"Amboli, Mumbai",19.14516,72.84674
5,Amrut Nagar,19.02614,72.86645


In [33]:
address = 'Mumbai, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Mumbai {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Mumbai 18.9387711, 72.8353355.


In [58]:
map_ = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_)  
    
map_

## Using FourSquare API to get venue details

In [238]:
CLIENT_ID = ''
CLIENT_SECRET = ' '
VERSION = '20200320'

In [240]:
neigh_list = list(df['Neighborhood'])

In [244]:
radius = 2000
LIMIT = 30
venues = []
result_venue=[]
count=-1
for lat, long, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    count+=1
    ll = str(lat)+','+str(long)
    client = Foursquare(client_id=CLIENT_ID, client_secret=CLIENT_SECRET,version = VERSION)
    result_venue.append(client.venues.explore(params={'ll':ll,'radius' : radius,'limit':LIMIT}))
    print(count) # to check number of neighborhoods processed 
                 # due to internet issues

0
1
2
3
4
5
6
7
8
9
10
11
12


Error connecting with foursquare API: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v2/venues/explore?ll=18.95130000000006,72.82930000000005&radius=2000&limit=30&v=20200320&client_id=PC0W23LXG00OY3H5QOV4S3WZIZG0MQEZUDYZKLB5N00AYX21&client_secret=HU31OWG0NR4TXZF514AM5N2B15HEAJJUYZJFSPHFUM30YDW2%20 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x000001CDAF766588>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'))
Error connecting with foursquare API: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v2/venues/explore?ll=18.95130000000006,72.82930000000005&radius=2000&limit=30&v=20200320&client_id=PC0W23LXG00OY3H5QOV4S3WZIZG0MQEZUDYZKLB5N00AYX21&client_secret=HU31OWG0NR4TXZF514AM5

13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


Error connecting with foursquare API: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v2/venues/explore?ll=19.0978250291419,72.84422864446877&radius=2000&limit=30&v=20200320&client_id=PC0W23LXG00OY3H5QOV4S3WZIZG0MQEZUDYZKLB5N00AYX21&client_secret=HU31OWG0NR4TXZF514AM5N2B15HEAJJUYZJFSPHFUM30YDW2%20 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x000001CDAB8B9198>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'))


33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61


Error connecting with foursquare API: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v2/venues/explore?ll=18.96172000000007,72.82625000000007&radius=2000&limit=30&v=20200320&client_id=PC0W23LXG00OY3H5QOV4S3WZIZG0MQEZUDYZKLB5N00AYX21&client_secret=HU31OWG0NR4TXZF514AM5N2B15HEAJJUYZJFSPHFUM30YDW2%20 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x000001CDB4D3B550>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'))
Error connecting with foursquare API: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v2/venues/explore?ll=18.96172000000007,72.82625000000007&radius=2000&limit=30&v=20200320&client_id=PC0W23LXG00OY3H5QOV4S3WZIZG0MQEZUDYZKLB5N00AYX21&client_secret=HU31OWG0NR4TXZF514AM5

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104


Error connecting with foursquare API: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v2/venues/explore?ll=19.123100000000022,72.90942000000007&radius=2000&limit=30&v=20200320&client_id=PC0W23LXG00OY3H5QOV4S3WZIZG0MQEZUDYZKLB5N00AYX21&client_secret=HU31OWG0NR4TXZF514AM5N2B15HEAJJUYZJFSPHFUM30YDW2%20 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x000001CDB4D7D438>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'))


105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126


Error connecting with foursquare API: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v2/venues/explore?ll=18.950120000000027,72.79980000000006&radius=2000&limit=30&v=20200320&client_id=PC0W23LXG00OY3H5QOV4S3WZIZG0MQEZUDYZKLB5N00AYX21&client_secret=HU31OWG0NR4TXZF514AM5N2B15HEAJJUYZJFSPHFUM30YDW2%20 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x000001CDB0149518>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'))
Error connecting with foursquare API: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v2/venues/explore?ll=18.950120000000027,72.79980000000006&radius=2000&limit=30&v=20200320&client_id=PC0W23LXG00OY3H5QOV4S3WZIZG0MQEZUDYZKLB5N00AYX21&client_secret=HU31OWG0NR4TXZF514A

127
128
129
130
131


In [245]:
temp = result_venue

In [246]:
i=-1
for result in result_venue:
    res = result['groups'][0]['items']
    i+=1
   
    for venue in res:
        venues.append((
            neigh_list[i],
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [247]:
venues_df = pd.DataFrame(venues)
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(3787, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Aarey Milk Colony,18.95028,72.83157,Tote On The Turf,18.980266,72.820294,Nightclub
1,Aarey Milk Colony,18.95028,72.83157,Mahalaxmi Race Course (Royal Western India Tur...,18.980535,72.818588,Club House
2,Aarey Milk Colony,18.95028,72.83157,Celejor,18.975844,72.823679,Bakery
3,Aarey Milk Colony,18.95028,72.83157,Willingdon Sports Club,18.976925,72.815256,Golf Course
4,Aarey Milk Colony,18.95028,72.83157,Neel,18.980407,72.820403,Indian Restaurant


In [250]:
onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

onehot['Neighborhoods'] = venues_df['Neighborhood'] 

fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

print(kl_onehot.shape)
onehot.head()

(3787, 169)


Unnamed: 0,Neighborhoods,Airport Service,American Restaurant,Arcade,Art Gallery,Asian Restaurant,Athletics & Sports,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Basketball Court,Beach,Bed & Breakfast,Beer Garden,Bengali Restaurant,Big Box Store,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Buffet,Burger Joint,Burrito Place,Bus Station,Café,Chaat Place,Cheese Shop,Chinese Restaurant,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Gym,Comedy Club,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Coworking Space,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Electronics Store,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Gastropub,General College & University,General Entertainment,German Restaurant,Gift Shop,Goan Restaurant,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Historic Site,History Museum,Hookah Bar,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,Ice Cream Shop,Indian Restaurant,Irani Cafe,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Lake,Lounge,Maharashtrian Restaurant,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Monument / Landmark,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Music Store,Music Venue,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Office,Other Great Outdoors,Outdoors & Recreation,Paper / Office Supplies Store,Park,Parsi Restaurant,Performing Arts Venue,Pharmacy,Pier,Pizza Place,Platform,Playground,Plaza,Pool,Pub,Punjabi Restaurant,Racetrack,Recording Studio,Recreation Center,Residential Building (Apartment / Condo),Resort,Restaurant,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,South Indian Restaurant,Spa,Sports Bar,Sports Club,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Track,Trail,Train Station,Vegetarian / Vegan Restaurant,Water Park,Women's Store
0,Aarey Milk Colony,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Aarey Milk Colony,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Aarey Milk Colony,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Aarey Milk Colony,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Aarey Milk Colony,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [251]:
grouped = onehot.groupby(["Neighborhoods"]).mean().reset_index()
grouped.head()

Unnamed: 0,Neighborhoods,Airport Service,American Restaurant,Arcade,Art Gallery,Asian Restaurant,Athletics & Sports,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Basketball Court,Beach,Bed & Breakfast,Beer Garden,Bengali Restaurant,Big Box Store,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Buffet,Burger Joint,Burrito Place,Bus Station,Café,Chaat Place,Cheese Shop,Chinese Restaurant,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Gym,Comedy Club,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Coworking Space,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Electronics Store,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Gastropub,General College & University,General Entertainment,German Restaurant,Gift Shop,Goan Restaurant,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Historic Site,History Museum,Hookah Bar,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,Ice Cream Shop,Indian Restaurant,Irani Cafe,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Lake,Lounge,Maharashtrian Restaurant,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Monument / Landmark,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Music Store,Music Venue,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Office,Other Great Outdoors,Outdoors & Recreation,Paper / Office Supplies Store,Park,Parsi Restaurant,Performing Arts Venue,Pharmacy,Pier,Pizza Place,Platform,Playground,Plaza,Pool,Pub,Punjabi Restaurant,Racetrack,Recording Studio,Recreation Center,Residential Building (Apartment / Condo),Resort,Restaurant,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,South Indian Restaurant,Spa,Sports Bar,Sports Club,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Track,Trail,Train Station,Vegetarian / Vegan Restaurant,Water Park,Women's Store
0,Aarey Milk Colony,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.033333,0.266667,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agripada,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.1,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Altamount Road,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.066667,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0
3,"Amboli, Mumbai",0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.1,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.1,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.033333,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Amrut Nagar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.3,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0


## Creating a DataFrame for malls data only

In [253]:
df_mall = grouped[["Neighborhoods","Shopping Mall"]]
df_mall.head()

Unnamed: 0,Neighborhoods,Shopping Mall
0,Aarey Milk Colony,0.0
1,Agripada,0.0
2,Altamount Road,0.0
3,"Amboli, Mumbai",0.0
4,Amrut Nagar,0.0


## Clustering

In [256]:
# set number of clusters
kclusters = 3

clustering = df_mall.drop(["Neighborhoods"], 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 2, 0, 0, 0])

In [257]:
merged = df_mall.copy()

# add clustering labels
merged["Cluster Labels"] = kmeans.labels_

In [258]:
merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
merged.head()

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels
0,Aarey Milk Colony,0.0,0
1,Agripada,0.0,0
2,Altamount Road,0.0,0
3,"Amboli, Mumbai",0.0,0
4,Amrut Nagar,0.0,0


In [263]:
merged.sort_values(["Cluster Labels"], inplace=True)
merged

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,Aarey Milk Colony,0.0,0,18.97628,72.82615
93,Mira Road,0.0,0,18.96862,72.83302
92,Mendham's Point,0.0,0,19.280032,72.867932
91,Mazagaon,0.0,0,18.94017,72.83483
90,"Matunga Road, Mumbai",0.0,0,18.96836,72.84174
89,Marol,0.0,0,19.025219,72.845545
88,Marine Lines,0.0,0,19.11905,72.88281
87,"Marine Drive, Mumbai",0.0,0,18.94343,72.82319
86,Manori,0.0,0,18.94666,72.82456
85,"Mandvi, Mumbai",0.0,0,19.20938,72.78315


## Showing Clusters on Map

In [268]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighborhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Visualising Cluster 0

In [278]:
cluster_0 = merged.loc[merged['Cluster Labels'] == 0]
print(cluster_0.shape)
cluster_0

(117, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,Aarey Milk Colony,0.0,0,18.97628,72.82615
93,Mira Road,0.0,0,18.96862,72.83302
92,Mendham's Point,0.0,0,19.280032,72.867932
91,Mazagaon,0.0,0,18.94017,72.83483
90,"Matunga Road, Mumbai",0.0,0,18.96836,72.84174
89,Marol,0.0,0,19.025219,72.845545
88,Marine Lines,0.0,0,19.11905,72.88281
87,"Marine Drive, Mumbai",0.0,0,18.94343,72.82319
86,Manori,0.0,0,18.94666,72.82456
85,"Mandvi, Mumbai",0.0,0,19.20938,72.78315


## Visualising Cluster 1

In [279]:
cluster_1 = merged.loc[merged['Cluster Labels'] == 1]
print(cluster_1.shape)
cluster_1

(8, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
59,JB Nagar,0.066667,1,19.19822,72.96171
130,Western Suburbs (Mumbai),0.066667,1,19.19021,72.853661
30,Cumbala Hill,0.066667,1,18.99515,72.83273
71,Kopar Road,0.066667,1,18.99293,72.83836
12,Bandra Kurla Complex,0.066667,1,19.16785,72.83292
83,Malabar Hill,0.066667,1,19.18655,72.84842
103,Pali Village,0.066667,1,18.9957,72.83913
21,Chandivali,0.066667,1,18.98587,72.83108


## Visualising Cluster 2

In [280]:
cluster_2 = merged.loc[merged['Cluster Labels'] == 2]
print(cluster_2.shape)
cluster_2

(7, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
97,Nariman Point,0.033333,2,18.90603,72.81545
6,Anushakti Nagar,0.033333,2,19.09525,72.89255
49,Girgaon,0.033333,2,19.17241,72.86955
113,Sewri,0.033333,2,19.16045,72.83651
109,Royal Opera House (Mumbai),0.033333,2,19.19574,72.83531
129,Wellington Pier (Bombay),0.033333,2,19.19701,72.82768
17,Byculla,0.043478,2,19.13891,72.93817


## Observations:
Most of the shopping malls are in cluster 0 and almost equal number in cluster 1 and cluster 2. This represents a great opportunity and high potential in areas of cluster 1 and 2 to open new shopping malls as there is very little to no competition from existing malls. Meanwhile, shopping malls in cluster 0 are likely suffering from intense competition due to oversupply and high concentration of shopping malls. Therefore, this project recommends property developers to capitalize on these findings to open new shopping malls in neighborhoods in cluster 1 and 2 with little to no competition. Lastly, property developers are advised to avoid neighborhoods in cluster 2 which already have high concentration of shopping malls and suffering from intense competition.