# Capstone Project – The Battle of Neighborhoods | Finding a Place to open Malay Restaurant in Kuala Lumpur

## 1. Installing and Importing Python Libraries and Dependencies

In [1]:
!pip install geocoder
!pip install folium

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes


In [2]:
import pandas as pd
import requests
import numpy as np
import geocoder
import folium
import requests 
import matplotlib.cm as cm
import matplotlib.colors as colors
import json
import xml
import csv
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

from pandas.io.json import json_normalize 
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim 
from bs4 import BeautifulSoup

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

print("All Required Libraries Imported!")

All Required Libraries Imported!


## 2. Data Extraction and Cleaning

Using BeautifulSoup Scraping List of Postal Codes of Given Wikipedia Page. Link: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

In [3]:
url = 'https://en.wikipedia.org/wiki/Kuala_Lumpur'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')

table = soup.find_all('table')[4]
rows = table.find_all('tr')[1:]

data = [([td.findNext(text=True) for td in tr.findAll("td")][:1]) for tr in rows]
print(data)

df = pd.DataFrame(data, columns = ['Neighbourhood'])
print(df.shape)
df.head(11)

[['Kepong'], ['Batu'], ['Wangsa Maju'], ['Segambut'], ['Setiawangsa'], ['Titiwangsa'], ['Bukit Bintang'], ['Lembah Pantai'], ['Seputeh'], ['Cheras'], ['Bandar Tun Razak']]
(11, 1)


Unnamed: 0,Neighbourhood
0,Kepong
1,Batu
2,Wangsa Maju
3,Segambut
4,Setiawangsa
5,Titiwangsa
6,Bukit Bintang
7,Lembah Pantai
8,Seputeh
9,Cheras


Converting content of PostalCode HTML table as dataframe

## 3. Get the geographical coordinates

In [4]:
# define a function to get coordinates
def get_latlng(Neighbourhood):
  # initialize your variable to None
  lat_lng_coords = None
  # loop until you get the coordinates
  while(lat_lng_coords is None):
    g = geocoder.arcgis('{}, KUlala Lumpur, Malaysia'.format(Neighbourhood))
    lat_lng_coords = g.latlng
  return lat_lng_coords


# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(Neighbourhood) for Neighbourhood in df["Neighbourhood"].tolist() ]

# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])


# merge the coordinates into the original dataframe
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude']

# check the neighborhoods and the coordinates
print(df.shape)
df.head(54)

(11, 3)


Unnamed: 0,Neighbourhood,Latitude,Longitude
0,Kepong,3.2175,101.63763
1,Batu,3.14789,101.69405
2,Wangsa Maju,3.20387,101.73715
3,Segambut,3.1865,101.66795
4,Setiawangsa,3.191802,101.740066
5,Titiwangsa,3.18073,101.70321
6,Bukit Bintang,3.14777,101.70855
7,Lembah Pantai,3.121189,101.663889
8,Seputeh,3.11327,101.68033
9,Cheras,3.06187,101.74675


In [5]:
# save the DataFrame as CSV file
df.to_csv("df.csv", index=False)

In [6]:
# get the coordinates of Karachi
address = 'Kuala Lumpur, Malaysia'

geolocator = Nominatim(user_agent="MyApp")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Kualal Lumpur, Malaysia {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Kualal Lumpur, Malaysia 3.1516964, 101.6942371.


## 4. Create a map of Kuala Lumpur with neighborhoods superimposed on top

In [7]:
# create map of Karachi using latitude and longitude values
map_jkt = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighbourhood in zip(df['Latitude'], df['Longitude'], df['Neighbourhood']):
    label = '{}'.format(neighbourhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_jkt)  
    
map_jkt

## 5. Use the Foursquare API to explore the neighborhoods

In [8]:

# define Foursquare Credentials and Version
CLIENT_ID = 'IZBKOUOBSZQOU4KUPMZLIMMJXUBTGH0YSGZK5YKBAXIVDLU4' # your Foursquare ID
CLIENT_SECRET = '3V41Y3SARCWI1VCSBHSISMTRDICQL5YDJI4HGPGQU5GPKLF1' # your Foursquare Secret
ACCESS_TOKEN = '54BVJACQ2WGWVLZ0PAASZBBOTTIF3V51IXJU52IEAEK3A4O3' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: IZBKOUOBSZQOU4KUPMZLIMMJXUBTGH0YSGZK5YKBAXIVDLU4
CLIENT_SECRET:3V41Y3SARCWI1VCSBHSISMTRDICQL5YDJI4HGPGQU5GPKLF1


Now, let's get the top 100 venues that are within a radius of 2000 meters.

In [9]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighbourhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat,
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [10]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighbourhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1100, 7)


Unnamed: 0,Neighbourhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Kepong,3.2175,101.63763,Herbaline Kepong,3.21587,101.638937,Massage Studio
1,Kepong,3.2175,101.63763,Little Oven Bakery,3.215304,101.639455,Bakery
2,Kepong,3.2175,101.63763,Foo Fee 福啡 Metro Perdana Lakeside,3.217494,101.643989,Coffee Shop
3,Kepong,3.2175,101.63763,Restaurant Day Two (天天小食中心),3.212102,101.641737,Food Court
4,Kepong,3.2175,101.63763,Restoran Bakuteh 興記肉骨茶,3.212754,101.645572,Chinese Restaurant


Let's check how many venues were returned for each neighorhood

In [11]:
venues_df.groupby(["Neighbourhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bandar Tun Razak,100,100,100,100,100,100
Batu,100,100,100,100,100,100
Bukit Bintang,100,100,100,100,100,100
Cheras,100,100,100,100,100,100
Kepong,100,100,100,100,100,100
Lembah Pantai,100,100,100,100,100,100
Segambut,100,100,100,100,100,100
Seputeh,100,100,100,100,100,100
Setiawangsa,100,100,100,100,100,100
Titiwangsa,100,100,100,100,100,100


Let's find out how many unique categories can be curated from all the returned venues

In [12]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 205 uniques categories.


In [13]:
# print out the list of categories
_VenueCategory = venues_df['VenueCategory'].unique()
_VenueCategory.sort()
_VenueCategory

array(['Accessories Store', 'Adult Boutique', 'African Restaurant',
       'American Restaurant', 'Aquarium', 'Arcade', 'Art Gallery',
       'Art Museum', 'Arts & Crafts Store', 'Asian Restaurant',
       'Athletics & Sports', 'Auditorium', 'Auto Garage', 'Auto Workshop',
       'BBQ Joint', 'Badminton Court', 'Bagel Shop', 'Bakery', 'Bar',
       'Basketball Court', 'Beer Bar', 'Beer Garden', 'Bistro',
       'Bookstore', 'Boutique', 'Brazilian Restaurant', 'Breakfast Spot',
       'Bridal Shop', 'Bubble Tea Shop', 'Buffet', 'Building',
       'Burger Joint', 'Café', 'Cantonese Restaurant',
       'Chettinad Restaurant', 'Chinese Breakfast Place',
       'Chinese Restaurant', 'Circus', 'Clothing Store', 'Club House',
       'Cocktail Bar', 'Coffee Shop', 'College Academic Building',
       'College Cafeteria', 'Comfort Food Restaurant',
       'Convenience Store', 'Cosmetics Shop', 'Dance Studio',
       'Deli / Bodega', 'Department Store', 'Dessert Shop',
       'Dim Sum Restaurant'

## 6. Analyze Each Neighborhood

In [14]:
# one hot encoding
kl_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
kl_onehot['Neighbourhoods'] = venues_df['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [kl_onehot.columns[-1]] + list(kl_onehot.columns[:-1])
kl_onehot = kl_onehot[fixed_columns]

print(kl_onehot.shape)
kl_onehot.head()

(1100, 206)


Unnamed: 0,Neighbourhoods,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Aquarium,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auditorium,Auto Garage,Auto Workshop,BBQ Joint,Badminton Court,Bagel Shop,Bakery,Bar,Basketball Court,Beer Bar,Beer Garden,Bistro,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Bridal Shop,Bubble Tea Shop,Buffet,Building,Burger Joint,Café,Cantonese Restaurant,Chettinad Restaurant,Chinese Breakfast Place,Chinese Restaurant,Circus,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Cafeteria,Comfort Food Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Shop,Electronics Store,Exhibit,Fabric Shop,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fishing Store,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Service,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Garden,Gas Station,Gay Bar,General Entertainment,Gift Shop,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Hainan Restaurant,Hakka Restaurant,Halal Restaurant,Hardware Store,History Museum,Hockey Arena,Hookah Bar,Hostel,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indonesian Restaurant,Iraqi Restaurant,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Korean BBQ Restaurant,Korean Restaurant,Lake,Latin American Restaurant,Leather Goods Store,Lingerie Store,Lounge,Malay Restaurant,Mamak Restaurant,Market,Martial Arts School,Massage Studio,Mediterranean Restaurant,Men's Store,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Moroccan Restaurant,Motorcycle Shop,Movie Theater,Multiplex,Museum,Night Market,Nightclub,Noodle House,North Indian Restaurant,Optical Shop,Other Great Outdoors,Outdoor Event Space,Outlet Store,Pakistani Restaurant,Palace,Park,Pastry Shop,Performing Arts Venue,Pet Café,Pet Store,Pharmacy,Pizza Place,Playground,Poke Place,Pool,Pool Hall,Print Shop,Pub,Ramen Restaurant,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,Rock Climbing Spot,Sake Bar,Salon / Barbershop,Sandwich Place,Satay Restaurant,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Ski Chalet,Ski Lodge,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soup Place,South Indian Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Stadium,Steakhouse,Street Food Gathering,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Temple,Tennis Court,Thai Restaurant,Theater,Thrift / Vintage Store,Toy / Game Store,Track Stadium,Trail,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Wings Joint,Yoga Studio
0,Kepong,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Kepong,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Kepong,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Kepong,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Kepong,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


 Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [15]:
kl_grouped = kl_onehot.groupby(["Neighbourhoods"]).mean().reset_index()

print(kl_grouped.shape)
kl_grouped.head()

(11, 206)


Unnamed: 0,Neighbourhoods,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Aquarium,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auditorium,Auto Garage,Auto Workshop,BBQ Joint,Badminton Court,Bagel Shop,Bakery,Bar,Basketball Court,Beer Bar,Beer Garden,Bistro,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Bridal Shop,Bubble Tea Shop,Buffet,Building,Burger Joint,Café,Cantonese Restaurant,Chettinad Restaurant,Chinese Breakfast Place,Chinese Restaurant,Circus,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Cafeteria,Comfort Food Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Shop,Electronics Store,Exhibit,Fabric Shop,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fishing Store,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Service,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Garden,Gas Station,Gay Bar,General Entertainment,Gift Shop,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Hainan Restaurant,Hakka Restaurant,Halal Restaurant,Hardware Store,History Museum,Hockey Arena,Hookah Bar,Hostel,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indonesian Restaurant,Iraqi Restaurant,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Korean BBQ Restaurant,Korean Restaurant,Lake,Latin American Restaurant,Leather Goods Store,Lingerie Store,Lounge,Malay Restaurant,Mamak Restaurant,Market,Martial Arts School,Massage Studio,Mediterranean Restaurant,Men's Store,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Moroccan Restaurant,Motorcycle Shop,Movie Theater,Multiplex,Museum,Night Market,Nightclub,Noodle House,North Indian Restaurant,Optical Shop,Other Great Outdoors,Outdoor Event Space,Outlet Store,Pakistani Restaurant,Palace,Park,Pastry Shop,Performing Arts Venue,Pet Café,Pet Store,Pharmacy,Pizza Place,Playground,Poke Place,Pool,Pool Hall,Print Shop,Pub,Ramen Restaurant,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,Rock Climbing Spot,Sake Bar,Salon / Barbershop,Sandwich Place,Satay Restaurant,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Ski Chalet,Ski Lodge,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soup Place,South Indian Restaurant,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Stadium,Steakhouse,Street Food Gathering,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Temple,Tennis Court,Thai Restaurant,Theater,Thrift / Vintage Store,Toy / Game Store,Track Stadium,Trail,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Wings Joint,Yoga Studio
0,Bandar Tun Razak,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.09,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.02,0.0,0.0,0.02,0.03,0.0,0.0,0.0,0.19,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.05,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0
1,Batu,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.13,0.03,0.01,0.0,0.0,0.05,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.03,0.0,0.01,0.0,0.01,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.04,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
2,Bukit Bintang,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.02,0.02,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.05,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.18,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.03,0.02,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.03,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0
3,Cheras,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.04,0.01,0.0,0.01,0.17,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.01,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0
4,Kepong,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.01,0.04,0.01,0.0,0.0,0.22,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.05,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.0,0.0


Create a new DataFrame for Malay Restaurant venues data only

In [16]:
len(kl_grouped[kl_grouped["Malay Restaurant"] > 0])

11

In [17]:
kl_cafe = kl_grouped[["Neighbourhoods","Malay Restaurant"]]

In [18]:
kl_cafe.head()

Unnamed: 0,Neighbourhoods,Malay Restaurant
0,Bandar Tun Razak,0.05
1,Batu,0.04
2,Bukit Bintang,0.01
3,Cheras,0.06
4,Kepong,0.02


## 7. Cluster Neighborhoods

Run k-means to cluster the neighborhoods in Kuala Lumpur into 3 clusters.

In [19]:
# set number of clusters
kclusters = 3

kl_clustering = kl_cafe.drop(["Neighbourhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 2, 2, 0, 2, 2, 2, 2, 1, 1], dtype=int32)

In [20]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
kl_merged = kl_cafe.copy()

# add clustering labels
kl_merged["Cluster Labels"] = kmeans.labels_

In [21]:
kl_merged.rename(columns={"Neighbourhoods": "Neighbourhood"}, inplace=True)
kl_merged.head()

Unnamed: 0,Neighbourhood,Malay Restaurant,Cluster Labels
0,Bandar Tun Razak,0.05,0
1,Batu,0.04,2
2,Bukit Bintang,0.01,2
3,Cheras,0.06,0
4,Kepong,0.02,2


In [22]:
# merge jakarta_grouped with jakarta_data to add latitude/longitude for each neighborhood
kl_merged = kl_merged.join(df.set_index("Neighbourhood"), on="Neighbourhood")

print(kl_merged.shape)
kl_merged.head() # check the last columns!

(11, 5)


Unnamed: 0,Neighbourhood,Malay Restaurant,Cluster Labels,Latitude,Longitude
0,Bandar Tun Razak,0.05,0,3.08276,101.72281
1,Batu,0.04,2,3.14789,101.69405
2,Bukit Bintang,0.01,2,3.14777,101.70855
3,Cheras,0.06,0,3.06187,101.74675
4,Kepong,0.02,2,3.2175,101.63763


In [23]:
# sort the results by Cluster Labels
print(kl_merged.shape)
kl_merged.sort_values(["Cluster Labels"], inplace=True)
kl_merged.head()

(11, 5)


Unnamed: 0,Neighbourhood,Malay Restaurant,Cluster Labels,Latitude,Longitude
0,Bandar Tun Razak,0.05,0,3.08276,101.72281
3,Cheras,0.06,0,3.06187,101.74675
10,Wangsa Maju,0.07,0,3.20387,101.73715
8,Setiawangsa,0.15,1,3.191802,101.740066
9,Titiwangsa,0.12,1,3.18073,101.70321


Finally, let's visualize the resulting clusters

In [24]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kl_merged['Latitude'], kl_merged['Longitude'], 
                                  kl_merged['Neighbourhood'], kl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 8. Examine Clusters

### Cluster 0

In [25]:
kl_merged.loc[kl_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighbourhood,Malay Restaurant,Cluster Labels,Latitude,Longitude
0,Bandar Tun Razak,0.05,0,3.08276,101.72281
3,Cheras,0.06,0,3.06187,101.74675
10,Wangsa Maju,0.07,0,3.20387,101.73715


### Cluster 1

In [26]:
kl_merged.loc[kl_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighbourhood,Malay Restaurant,Cluster Labels,Latitude,Longitude
8,Setiawangsa,0.15,1,3.191802,101.740066
9,Titiwangsa,0.12,1,3.18073,101.70321


### Cluster 2

In [27]:
kl_merged.loc[kl_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighbourhood,Malay Restaurant,Cluster Labels,Latitude,Longitude
1,Batu,0.04,2,3.14789,101.69405
2,Bukit Bintang,0.01,2,3.14777,101.70855
4,Kepong,0.02,2,3.2175,101.63763
5,Lembah Pantai,0.02,2,3.121189,101.663889
6,Segambut,0.03,2,3.1865,101.66795
7,Seputeh,0.03,2,3.11327,101.68033
