In [33]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Libraries imported.


## 2. Scrap data from Wikipedia page into a DataFrame

In [23]:
!pip install geocoder

Collecting geocoder
  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [26]:
import geocoder

In [75]:


# send the GET request
data = requests.get("https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Kolkata").text



In [76]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

# create a list to store neighborhood data
neighborhoodList = []



In [77]:


# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)



In [78]:


# create a new DataFrame from the list
tk_df = pd.DataFrame({"Neighborhood": neighborhoodList})

tk_df.head()



Unnamed: 0,Neighborhood
0,Neighbourhoods in Kolkata Metropolitan Area
1,Abhirampur
2,Agarpara
3,Ajoy Nagar
4,Alipore


In [79]:
# print the number of rows of the dataframe
tk_df.shape

(200, 1)

## 3. Get the geographical coordinates

In [82]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Kolkata, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [83]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in tk_df["Neighborhood"].tolist() ]
coords

[[22.570530000000076, 88.37124000000006],
 [22.530693771856434, 88.34650250291418],
 [22.68405000000007, 88.39165000000008],
 [22.48966000000007, 88.39640000000009],
 [22.52660000000003, 88.33510000000007],
 [22.98801000000003, 88.38838000000004],
 [22.482678048848754, 88.37658799417164],
 [22.51208747155059, 88.3369511035259],
 [22.570530000000076, 88.37124000000006],
 [22.48783004079867, 88.38464411004674],
 [22.570530000000076, 88.37124000000006],
 [22.472170000000062, 88.25546000000008],
 [22.567630000000065, 88.34453000000008],
 [22.56729000000007, 88.34106000000008],
 [22.555079999999997, 88.24685148658381],
 [22.604020000000048, 88.36637000000007],
 [22.48395000000005, 88.37543000000005],
 [22.61674000000005, 88.42691000000008],
 [22.472510000000057, 88.36513000000008],
 [22.700780000000066, 88.38065000000006],
 [22.497570000000053, 88.23033000000004],
 [22.647860000000037, 88.34428000000008],
 [22.647860000000037, 88.34428000000008],
 [22.531780000000026, 88.36437000000006],
 [

In [84]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [85]:
# merge the coordinates into the original dataframe
tk_df['Latitude'] = df_coords['Latitude']
tk_df['Longitude'] = df_coords['Longitude']


# check the neighborhoods and the coordinates
print(tk_df.shape)
tk_df



(200, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Neighbourhoods in Kolkata Metropolitan Area,22.57053,88.37124
1,Abhirampur,22.530694,88.346503
2,Agarpara,22.68405,88.39165
3,Ajoy Nagar,22.48966,88.3964
4,Alipore,22.5266,88.3351
5,Amodghata,22.98801,88.38838
6,Amtala,22.482678,88.376588
7,"Anandapur, Kolkata",22.512087,88.336951
8,Andul,22.57053,88.37124
9,Ankurhati,22.48783,88.384644


In [86]:
# save the DataFrame as CSV file
tk_df.to_csv("kl_df.csv", index=False)

## 4. Create a map of Kuala Lumpur with neighborhoods superimposed on top

In [88]:
 #get the coordinates of Tokyo
address = 'Kolkata, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Kolkata, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Kolkata, India 22.54541245, 88.3567751581234.


In [89]:
# create map of Toronto using latitude and longitude values
map_tk = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(tk_df['Latitude'], tk_df['Longitude'], tk_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_tk)  
    
map_tk

In [90]:
# save the map as HTML file
map_tk.save('map_tk.html')

## 5. Use the Foursquare API to explore the neighborhoods

In [91]:
# define Foursquare Credentials and Version
CLIENT_ID = 'BF3WWPUL1CHNPNSK0LWGL0JKIDFRKWE22GQEPMV4J5AAN4QR' # your Foursquare ID
CLIENT_SECRET = '0LZBN5VRN4HB3ZJQFUTPIWQIUVZQ4MBL5QZFB4HNFRSVGVGK' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Your credentails:
CLIENT_ID: BF3WWPUL1CHNPNSK0LWGL0JKIDFRKWE22GQEPMV4J5AAN4QR
CLIENT_SECRET:0LZBN5VRN4HB3ZJQFUTPIWQIUVZQ4MBL5QZFB4HNFRSVGVGK


### Now, let's get the top 100 venues that are within a radius of 2000 meters.

In [94]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(tk_df['Latitude'], tk_df['Longitude'], tk_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [95]:


# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()



(4652, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Neighbourhoods in Kolkata Metropolitan Area,22.57053,88.37124,Big Bazaar,22.565919,88.369635,Department Store
1,Neighbourhoods in Kolkata Metropolitan Area,22.57053,88.37124,College Square,22.574746,88.363895,Plaza
2,Neighbourhoods in Kolkata Metropolitan Area,22.57053,88.37124,Bhim Chandra Nag,22.570639,88.371524,Indian Sweet Shop
3,Neighbourhoods in Kolkata Metropolitan Area,22.57053,88.37124,College Street Market,22.576321,88.363631,Bookstore
4,Neighbourhoods in Kolkata Metropolitan Area,22.57053,88.37124,Jorasankho Thakur-Bari,22.584401,88.359526,History Museum


### Let's check how many venues were returned for each neighorhood

In [96]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Abhirampur,100,100,100,100,100,100
Agarpara,8,8,8,8,8,8
Ajoy Nagar,6,6,6,6,6,6
Alipore,44,44,44,44,44,44
Amodghata,4,4,4,4,4,4
Amtala,12,12,12,12,12,12
"Anandapur, Kolkata",40,40,40,40,40,40
Andul,28,28,28,28,28,28
Ankurhati,7,7,7,7,7,7
Argari,28,28,28,28,28,28


### Let's find out how many unique categories can be curated from all the returned venues

In [97]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 154 uniques categories.


In [98]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:50]

array(['Department Store', 'Plaza', 'Indian Sweet Shop', 'Bookstore',
       'History Museum', 'South Indian Restaurant', 'Juice Bar', 'Café',
       'Art Museum', 'Mughlai Restaurant', 'Sandwich Place',
       'Train Station', 'Park', 'Platform', 'Indian Restaurant',
       'Multiplex', 'Vegetarian / Vegan Restaurant', 'Hotel',
       'Bus Station', 'Flea Market', 'Coffee Shop', 'Dhaba',
       'Chinese Restaurant', 'Bengali Restaurant', 'Bakery',
       'American Restaurant', 'Nightclub', 'Fast Food Restaurant',
       'Italian Restaurant', 'Kerala Restaurant', 'Lounge',
       'Ice Cream Shop', 'Gastropub', 'Awadhi Restaurant',
       'Theme Restaurant', 'Falafel Restaurant', 'Restaurant',
       'Dessert Shop', 'Sports Club', 'Tea Room', 'Shopping Mall',
       'Indie Theater', 'Performing Arts Venue', 'Art Gallery',
       'Mexican Restaurant', 'Pub', 'Hookah Bar', 'Pizza Place', 'Market',
       'Planetarium'], dtype=object)

In [99]:
# check if the results contain "Shopping Mall"
"Neighborhood" in venues_df['VenueCategory'].unique()

True

### 6. Analyze Each Neighborhood

In [100]:


# one hot encoding
tk_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
tk_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [tk_onehot.columns[-1]] + list(tk_onehot.columns[:-1])
tk_onehot = tk_onehot[fixed_columns]

print(tk_onehot.shape)
tk_onehot.head()



(4652, 155)


Unnamed: 0,Neighborhoods,ATM,Afghan Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Awadhi Restaurant,BBQ Joint,Bakery,Bank,Bar,Beer Bar,Beer Garden,Bengali Restaurant,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bus Station,Bus Stop,Business Service,Café,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Convenience Store,Cricket Ground,Department Store,Dessert Shop,Dhaba,Diner,Dumpling Restaurant,Electronics Store,Event Service,Falafel Restaurant,Fast Food Restaurant,Field,Film Studio,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gastropub,Gift Shop,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Historic Site,History Museum,Hookah Bar,Hostel,Hotel,Hotel Pool,IT Services,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Indie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kerala Restaurant,Light Rail Station,Lounge,Market,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Military Base,Mobile Phone Shop,Motorcycle Shop,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Museum,Music Store,Music Venue,Neighborhood,Nightclub,North Indian Restaurant,Northeast Indian Restaurant,Optical Shop,Outdoor Sculpture,Park,Performing Arts Venue,Perfume Shop,Pharmacy,Pizza Place,Planetarium,Platform,Playground,Plaza,Pool,Port,Pub,Racetrack,Resort,Restaurant,River,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Snack Place,Soccer Stadium,Social Club,Soup Place,South Indian Restaurant,Spa,Sports Club,Stadium,Steakhouse,Supermarket,Taxi Stand,Tea Room,Tennis Court,Tex-Mex Restaurant,Thai Restaurant,Theme Park,Theme Restaurant,Tibetan Restaurant,Toll Booth,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Watch Shop,Women's Store,Zoo
0,Neighbourhoods in Kolkata Metropolitan Area,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Neighbourhoods in Kolkata Metropolitan Area,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Neighbourhoods in Kolkata Metropolitan Area,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Neighbourhoods in Kolkata Metropolitan Area,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Neighbourhoods in Kolkata Metropolitan Area,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [101]:
tk_grouped = tk_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(tk_grouped.shape)
tk_grouped

(189, 155)


Unnamed: 0,Neighborhoods,ATM,Afghan Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Awadhi Restaurant,BBQ Joint,Bakery,Bank,Bar,Beer Bar,Beer Garden,Bengali Restaurant,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bus Station,Bus Stop,Business Service,Café,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Convenience Store,Cricket Ground,Department Store,Dessert Shop,Dhaba,Diner,Dumpling Restaurant,Electronics Store,Event Service,Falafel Restaurant,Fast Food Restaurant,Field,Film Studio,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gastropub,Gift Shop,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Historic Site,History Museum,Hookah Bar,Hostel,Hotel,Hotel Pool,IT Services,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Indie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kerala Restaurant,Light Rail Station,Lounge,Market,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Military Base,Mobile Phone Shop,Motorcycle Shop,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Museum,Music Store,Music Venue,Neighborhood,Nightclub,North Indian Restaurant,Northeast Indian Restaurant,Optical Shop,Outdoor Sculpture,Park,Performing Arts Venue,Perfume Shop,Pharmacy,Pizza Place,Planetarium,Platform,Playground,Plaza,Pool,Port,Pub,Racetrack,Resort,Restaurant,River,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Snack Place,Soccer Stadium,Social Club,Soup Place,South Indian Restaurant,Spa,Sports Club,Stadium,Steakhouse,Supermarket,Taxi Stand,Tea Room,Tennis Court,Tex-Mex Restaurant,Thai Restaurant,Theme Park,Theme Restaurant,Tibetan Restaurant,Toll Booth,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Watch Shop,Women's Store,Zoo
0,Abhirampur,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.15,0.05,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.06,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.08,0.0,0.0,0.02,0.04,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.02,0.03,0.0,0.0,0.02,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.04,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
1,Agarpara,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0
2,Ajoy Nagar,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Alipore,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.022727,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.159091,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068182,0.022727,0.0,0.022727,0.045455,0.022727,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.022727,0.0,0.022727,0.022727,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.045455,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727
4,Amodghata,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Amtala,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0
6,"Anandapur, Kolkata",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.025,0.0,0.0,0.0,0.0,0.025,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.025,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.075,0.025,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.05,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.025,0.0,0.05,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Andul,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.107143,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.0,0.071429,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.071429,0.0,0.0,0.0
8,Ankurhati,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0
9,Argari,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.107143,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.0,0.071429,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.071429,0.0,0.0,0.0


In [102]:
len(tk_grouped[tk_grouped["Coffee Shop"] > 0])

56

### Create a new DataFrame for Shopping Mall data only

In [103]:
tk_shop = tk_grouped[["Neighborhoods","Coffee Shop"]]

In [104]:
tk_shop.head()

Unnamed: 0,Neighborhoods,Coffee Shop
0,Abhirampur,0.0
1,Agarpara,0.0
2,Ajoy Nagar,0.0
3,Alipore,0.0
4,Amodghata,0.0


## 7. Cluster Neighborhoods


### Run k-means to cluster the neighborhoods in Kuala Lumpur into 3 clusters.

In [105]:
# set number of clusters
kclusters = 3

tk_clustering = tk_shop.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tk_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 2, 0, 2])

In [106]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
tk_merged = tk_shop.copy()

# add clustering labels
tk_merged["Cluster Labels"] = kmeans.labels_

In [107]:
tk_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
tk_merged.head()

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels
0,Abhirampur,0.0,0
1,Agarpara,0.0,0
2,Ajoy Nagar,0.0,0
3,Alipore,0.0,0
4,Amodghata,0.0,0


In [108]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
tk_merged = tk_merged.join(tk_df.set_index("Neighborhood"), on="Neighborhood")

print(tk_merged.shape)
tk_merged.head() # check the last columns!

(189, 5)


Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Latitude,Longitude
0,Abhirampur,0.0,0,22.530694,88.346503
1,Agarpara,0.0,0,22.68405,88.39165
2,Ajoy Nagar,0.0,0,22.48966,88.3964
3,Alipore,0.0,0,22.5266,88.3351
4,Amodghata,0.0,0,22.98801,88.38838


In [109]:
# sort the results by Cluster Labels
print(tk_merged.shape)
tk_merged.sort_values(["Cluster Labels"], inplace=True)
tk_merged

(189, 5)


Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Latitude,Longitude
0,Abhirampur,0.0,0,22.530694,88.346503
110,East Kolkata,0.0,0,22.48484,88.37523
111,Ekbalpur,0.0,0,22.53529,88.32266
113,Entally,0.01,0,22.55659,88.36925
114,"Esplanade, Kolkata",0.014706,0,22.55871,88.34898
115,"Fort William, India",0.016129,0,22.55956,88.34262
116,Ganye Gangadharpur,0.0,0,22.47867,88.25941
117,Garden Reach,0.0,0,22.53662,88.29594
107,"Dunlop, Kolkata",0.011364,0,22.56987,88.35171
119,Garia,0.0,0,22.46112,88.38845


### Finally, let's visualize the resulting clusters

In [110]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tk_merged['Latitude'], tk_merged['Longitude'], tk_merged['Neighborhood'], tk_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [111]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

## 8. Examine Clusters

### Cluster 0

In [117]:
t0 = tk_merged.loc[tk_merged['Cluster Labels'] == 0]
t0

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Latitude,Longitude
0,Abhirampur,0.0,0,22.530694,88.346503
110,East Kolkata,0.0,0,22.48484,88.37523
111,Ekbalpur,0.0,0,22.53529,88.32266
113,Entally,0.01,0,22.55659,88.36925
114,"Esplanade, Kolkata",0.014706,0,22.55871,88.34898
115,"Fort William, India",0.016129,0,22.55956,88.34262
116,Ganye Gangadharpur,0.0,0,22.47867,88.25941
117,Garden Reach,0.0,0,22.53662,88.29594
107,"Dunlop, Kolkata",0.011364,0,22.56987,88.35171
119,Garia,0.0,0,22.46112,88.38845


### Cluster 1

In [113]:
t1 = tk_merged.loc[tk_merged['Cluster Labels'] == 1]
t1

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Latitude,Longitude
128,Golf Green,0.0625,1,22.49417,88.36331
181,Kolkata West International City,0.052632,1,22.54163,88.38631
118,Garfa,0.058824,1,22.50275,88.37935
187,Lake Gardens,0.045455,1,22.50493,88.35625
109,Duttapukur,0.057143,1,22.496699,88.348896
136,Hatgachha,0.055556,1,22.54543,88.39698
172,Katju Nagar,0.0625,1,22.4984,88.36997
58,Bijoygarh,0.071429,1,22.48773,88.36536
174,"Kesabpur, India",0.055556,1,22.551183,88.383573
60,Bikramgarh,0.051282,1,22.4973,88.35919


### Cluster 2

In [118]:
t2 = tk_merged.loc[tk_merged['Cluster Labels'] == 2]
t2

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Latitude,Longitude
7,Andul,0.035714,2,22.57053,88.37124
169,Kantlia,0.035714,2,22.57053,88.37124
9,Argari,0.035714,2,22.57053,88.37124
171,"Kasba, Kolkata",0.033333,2,22.52754,88.37517
168,Kankurgachi,0.037037,2,22.57834,88.39386
176,Khalia,0.035714,2,22.57053,88.37124
11,B. B. D. Bagh,0.020833,2,22.56763,88.34453
134,Harinavi,0.035714,2,22.57053,88.37124
22,Ballygunge,0.02,2,22.53178,88.36437
76,Chak Baria,0.04,2,22.559965,88.399206
