In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import numpy as np
import folium
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
res = requests.get("https://en.wikipedia.org/wiki/List_of_districts_of_Bangkok")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))[0]
df

Unnamed: 0,District(Khet),Postcode,Thai,Population,No. ofSubdistricts(Khwaeng),Latitude,Longitude
0,Bang Bon,10150,บางบอน,105161,4,13.6592,100.3991
1,Bang Kapi,10240,บางกะปิ,148465,2,13.765833,100.647778
2,Bang Khae,10160,บางแค,191781,4,13.696111,100.409444
3,Bang Khen,10220,บางเขน,189539,2,13.873889,100.596389
4,Bang Kho Laem,10120,บางคอแหลม,94956,3,13.693333,100.5025
5,Bang Khun Thian,10150,บางขุนเทียน,165491,2,13.660833,100.435833
6,Bang Na,10260,บางนา,95912,2,13.680081,100.5918
7,Bang Phlat,10700,บางพลัด,99273,4,13.793889,100.505
8,Bang Rak,10500,บางรัก,45875,5,13.730833,100.524167
9,Bang Sue,10800,บางซื่อ,132234,2,13.809722,100.537222


In [3]:
df_bkk = df[['District(Khet)','Latitude','Longitude']].copy()
df_bkk.rename(columns={'District(Khet)':'District'}, inplace=True)
df_bkk

Unnamed: 0,District,Latitude,Longitude
0,Bang Bon,13.6592,100.3991
1,Bang Kapi,13.765833,100.647778
2,Bang Khae,13.696111,100.409444
3,Bang Khen,13.873889,100.596389
4,Bang Kho Laem,13.693333,100.5025
5,Bang Khun Thian,13.660833,100.435833
6,Bang Na,13.680081,100.5918
7,Bang Phlat,13.793889,100.505
8,Bang Rak,13.730833,100.524167
9,Bang Sue,13.809722,100.537222


In [4]:
df_pop = pd.read_csv("../capstone_coursera/districtwithpop18_40.csv", index_col=0)
df_pop

Unnamed: 0,District,Population18_40
0,Bang Bon,33565
1,Bang Kapi,46041
2,Bang Khae,62257
3,Bang Khen,61167
4,Bang Kho Laem,25477
5,Bang Khun Thian,61456
6,Bang Na,29449
7,Bang Phlat,27858
8,Bang Rak,10924
9,Bang Sue,38954


In [5]:
df_bkk = df_bkk.join(df_pop.set_index('District'), on='District')

In [6]:
df_bkk

Unnamed: 0,District,Latitude,Longitude,Population18_40
0,Bang Bon,13.6592,100.3991,33565
1,Bang Kapi,13.765833,100.647778,46041
2,Bang Khae,13.696111,100.409444,62257
3,Bang Khen,13.873889,100.596389,61167
4,Bang Kho Laem,13.693333,100.5025,25477
5,Bang Khun Thian,13.660833,100.435833,61456
6,Bang Na,13.680081,100.5918,29449
7,Bang Phlat,13.793889,100.505,27858
8,Bang Rak,13.730833,100.524167,10924
9,Bang Sue,13.809722,100.537222,38954


In [13]:
df_bkk['Population18_40'].describe()

count       50.000000
mean     35849.120000
std      15953.862202
min       6068.000000
25%      24397.500000
50%      32718.000000
75%      45953.500000
max      67490.000000
Name: Population18_40, dtype: float64

In [8]:
address = 'Bangkok'
geolocator = Nominatim(user_agent="explorer")
location = geolocator.geocode(address)
latitude_bkk = location.latitude
longitude_bkk = location.longitude

In [9]:
map_bkk = folium.Map(location=[latitude_bkk, longitude_bkk], zoom_start=10)

for lat, lng, label in zip(df_bkk['Latitude'], df_bkk['Longitude'], df_bkk['District']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bkk) 
    
map_bkk

# Find district with Above average population age 18-40

In [14]:
Avg = df_bkk['Population18_40'].mean()
Avg

35849.12

In [15]:
df_aboveavg = df_bkk[df_bkk['Population18_40']>Avg]
df_aboveavg

Unnamed: 0,District,Latitude,Longitude,Population18_40
1,Bang Kapi,13.765833,100.647778,46041
2,Bang Khae,13.696111,100.409444,62257
3,Bang Khen,13.873889,100.596389,61167
5,Bang Khun Thian,13.660833,100.435833,61456
9,Bang Sue,13.809722,100.537222,38954
12,Bueng Kum,13.785278,100.669167,44623
13,Chatuchak,13.828611,100.559722,50305
14,Chom Thong,13.677222,100.484722,47742
15,Din Daeng,13.769722,100.552778,36669
16,Don Mueang,13.913611,100.589722,60739


In [16]:
df_aboveavg.shape

(21, 4)

# GET VENUES FROM FOURSQUARE

In [17]:

CLIENT_ID = #remove
CLIENT_SECRET = #remove
VERSION = '20180605'

In [18]:
def getNearbyVenues(names, latitudes, longitudes, radius=1500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['District', 
                  'District Latitude', 
                  'District Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
LIMIT = 200
bkk_venues = getNearbyVenues(names=df_bkk['District'],
                                   latitudes=df_bkk['Latitude'],
                                   longitudes=df_bkk['Longitude']
                                  )

Bang Bon
Bang Kapi
Bang Khae
Bang Khen
Bang Kho Laem
Bang Khun Thian
Bang Na
Bang Phlat
Bang Rak
Bang Sue
Bangkok Noi
Bangkok Yai
Bueng Kum
Chatuchak
Chom Thong
Din Daeng
Don Mueang
Dusit
Huai Khwang
Khan Na Yao
Khlong Sam Wa
Khlong San
Khlong Toei
Lak Si
Lat Krabang
Lat Phrao
Min Buri
Nong Chok
Nong Khaem
Pathum Wan
Phasi Charoen
Phaya Thai
Phra Khanong
Phra Nakhon
Pom Prap Sattru Phai
Prawet
Rat Burana
Ratchathewi
Sai Mai
Samphanthawong
Saphan Sung
Sathon
Suan Luang
Taling Chan
Thawi Watthana
Thon Buri
Thung Khru
Wang Thonglang
Watthana
Yan Nawa


In [20]:
print(bkk_venues.shape)
bkk_venues.head()

(3937, 7)


Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bang Bon,13.6592,100.3991,ขาหมูบางหว้า,13.657136,100.39523,Thai Restaurant
1,Bang Bon,13.6592,100.3991,Irashaimase Japanese Restaurant,13.658358,100.401403,Japanese Restaurant
2,Bang Bon,13.6592,100.3991,ร้านต้นไม้ ริมถนนกาญจนาภิเษก,13.654098,100.405054,Garden Center
3,Bang Bon,13.6592,100.3991,เจ๊ไน้ ก๋วยเตี๋ยวเป็ดตุ๋น,13.654137,100.405323,Noodle House
4,Bang Bon,13.6592,100.3991,หมูสะเต๊ะแม่กำไร,13.653925,100.391426,Satay Restaurant


# Find coffee shops in Bangkok

In [21]:
bkk_coffee = bkk_venues[bkk_venues['Venue Category'] == 'Coffee Shop']

In [22]:
bkk_coffee.reset_index(drop=True)

Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bang Bon,13.659200,100.399100,Jeab Coffee Love By Bluekoff,13.655153,100.392817,Coffee Shop
1,Bang Bon,13.659200,100.399100,Café Amazon (คาเฟ่ อเมซอน),13.664560,100.406189,Coffee Shop
2,Bang Bon,13.659200,100.399100,Coffee Ya,13.664640,100.409356,Coffee Shop
3,Bang Kapi,13.765833,100.647778,Starbucks (สตาร์บัคส์),13.767339,100.642136,Coffee Shop
4,Bang Kapi,13.765833,100.647778,TrueCoffee (ทรูคอฟฟี่),13.767280,100.642488,Coffee Shop
...,...,...,...,...,...,...,...
283,Yan Nawa,13.696944,100.543056,Starbucks (สตาร์บัคส์),13.701035,100.536520,Coffee Shop
284,Yan Nawa,13.696944,100.543056,Starbucks (สตาร์บัคส์),13.697771,100.537527,Coffee Shop
285,Yan Nawa,13.696944,100.543056,Starbucks (สตาร์บัคส์),13.697126,100.537882,Coffee Shop
286,Yan Nawa,13.696944,100.543056,Zana's Bean Coffee,13.685169,100.537468,Coffee Shop


In [24]:
bkk_coffeeperdistrict = bkk_coffee.groupby('District')['Venue Category'].count().to_frame()
bkk_coffeeperdistrict.rename(columns={'Venue Category':'Coffee shop'}, inplace=True)
bkk_coffeeperdistrict.reset_index(inplace=True)
bkk_coffeeperdistrict

Unnamed: 0,District,Coffee shop
0,Bang Bon,3
1,Bang Kapi,10
2,Bang Khae,2
3,Bang Khen,4
4,Bang Kho Laem,9
5,Bang Khun Thian,10
6,Bang Na,4
7,Bang Phlat,5
8,Bang Rak,6
9,Bang Sue,3


In [26]:
df_bkk2 = df_bkk[['District','Population18_40']].copy()

In [81]:
df_coffee2 = df_bkk2.join(bkk_coffeeperdistrict.set_index('District'), on='District')
df_coffee2.fillna(0, inplace=True)
#fill NaN with 0.1 to avoid divided by 0

In [33]:
df_coffee2

Unnamed: 0,District,Population18_40,Coffee shop
0,Bang Bon,33565,3.0
1,Bang Kapi,46041,10.0
2,Bang Khae,62257,2.0
3,Bang Khen,61167,4.0
4,Bang Kho Laem,25477,9.0
5,Bang Khun Thian,61456,10.0
6,Bang Na,29449,4.0
7,Bang Phlat,27858,5.0
8,Bang Rak,10924,6.0
9,Bang Sue,38954,3.0


In [85]:
df_coffee2['Per Person'] = round(df_coffee2['Population18_40']/(df_coffee2['Coffee shop']+1),2)

In [86]:
df_coffee2.sort_values(by=['Per Person'], ascending=False, inplace=True)

# Criteria 2 : top 10 Population age 18-40 per coffee shop

In [87]:
df_coffeetopten = df_coffee2.head(10)
df_coffeetopten

Unnamed: 0,District,Population18_40,Coffee shop,Per Person
24,Lat Krabang,60729,0.0,60729.0
35,Prawet,59059,0.0,59059.0
38,Sai Mai,66690,1.0,33345.0
28,Nong Khaem,50783,1.0,25391.5
2,Bang Khae,62257,2.0,20752.33
20,Khlong Sam Wa,67490,3.0,16872.5
3,Bang Khen,61167,4.0,12233.4
14,Chom Thong,47742,3.0,11935.5
27,Nong Chok,59228,4.0,11845.6
26,Min Buri,45691,3.0,11422.75


In [90]:
df_bestloc1 = pd.merge(df_coffeetopten, df_aboveavg, how='inner', left_on=['District','Population18_40'], right_on=['District','Population18_40'])

In [91]:
df_bestloc1

Unnamed: 0,District,Population18_40,Coffee shop,Per Person,Latitude,Longitude
0,Lat Krabang,60729,0.0,60729.0,13.722317,100.759669
1,Prawet,59059,0.0,59059.0,13.716944,100.694444
2,Sai Mai,66690,1.0,33345.0,13.919167,100.645833
3,Nong Khaem,50783,1.0,25391.5,13.704722,100.348889
4,Bang Khae,62257,2.0,20752.33,13.696111,100.409444
5,Khlong Sam Wa,67490,3.0,16872.5,13.859722,100.704167
6,Bang Khen,61167,4.0,12233.4,13.873889,100.596389
7,Chom Thong,47742,3.0,11935.5,13.677222,100.484722
8,Nong Chok,59228,4.0,11845.6,13.855556,100.8625
9,Min Buri,45691,3.0,11422.75,13.813889,100.748056


In [39]:
map_bkk = folium.Map(location=[latitude_bkk, longitude_bkk], zoom_start=10)

for lat, lng, label in zip(df_bestloc1['Latitude'], df_bestloc1['Longitude'], df_bestloc1['District']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bkk) 
    
map_bkk

# Venue characteristics

In [40]:
print(bkk_venues.shape)
bkk_venues.head()

(3937, 7)


Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bang Bon,13.6592,100.3991,ขาหมูบางหว้า,13.657136,100.39523,Thai Restaurant
1,Bang Bon,13.6592,100.3991,Irashaimase Japanese Restaurant,13.658358,100.401403,Japanese Restaurant
2,Bang Bon,13.6592,100.3991,ร้านต้นไม้ ริมถนนกาญจนาภิเษก,13.654098,100.405054,Garden Center
3,Bang Bon,13.6592,100.3991,เจ๊ไน้ ก๋วยเตี๋ยวเป็ดตุ๋น,13.654137,100.405323,Noodle House
4,Bang Bon,13.6592,100.3991,หมูสะเต๊ะแม่กำไร,13.653925,100.391426,Satay Restaurant


In [80]:
bkk_venues.to_csv("bkk_venues.csv")

In [41]:
bkk_venues.groupby('District')['Venue Category'].count().to_frame()

Unnamed: 0_level_0,Venue Category
District,Unnamed: 1_level_1
Bang Bon,46
Bang Kapi,100
Bang Khae,34
Bang Khen,77
Bang Kho Laem,100
Bang Khun Thian,100
Bang Na,46
Bang Phlat,94
Bang Rak,100
Bang Sue,69


In [42]:
print('There are {} uniques categories.'.format(len(bkk_venues['Venue Category'].unique())))

There are 269 uniques categories.


In [43]:
bkk_onehot = pd.get_dummies(bkk_venues[['Venue Category']], prefix="", prefix_sep="")
bkk_onehot['District'] = bkk_venues['District'] 
fixed_columns = [bkk_onehot.columns[-1]] + list(bkk_onehot.columns[:-1])
bkk_onehot = bkk_onehot[fixed_columns]
bkk_onehot.head()

Unnamed: 0,District,Accessories Store,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Arcade,...,Vietnamese Restaurant,Vineyard,Warehouse Store,Water Park,Whisky Bar,Wine Bar,Wings Joint,Yoga Studio,Yoshoku Restaurant,Zoo
0,Bang Bon,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Bang Bon,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Bang Bon,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Bang Bon,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Bang Bon,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [44]:
bkk_grouped = bkk_onehot.groupby('District').mean().reset_index()
bkk_grouped

Unnamed: 0,District,Accessories Store,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Arcade,...,Vietnamese Restaurant,Vineyard,Warehouse Store,Water Park,Whisky Bar,Wine Bar,Wings Joint,Yoga Studio,Yoshoku Restaurant,Zoo
0,Bang Bon,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bang Kapi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0
2,Bang Khae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bang Khen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.025974,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bang Kho Laem,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,...,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0
5,Bang Khun Thian,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Bang Na,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0
7,Bang Phlat,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Bang Rak,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0
9,Bang Sue,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0


In [45]:
num_top_venues = 10

for hood in bkk_grouped['District']:
    print("----"+hood+"----")
    temp = bkk_grouped[bkk_grouped['District'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bang Bon----
                  venue  freq
0     Convenience Store  0.09
1          Noodle House  0.09
2  Fast Food Restaurant  0.07
3           Coffee Shop  0.07
4       Thai Restaurant  0.04
5            Restaurant  0.04
6    Chinese Restaurant  0.04
7    Som Tum Restaurant  0.04
8  Gym / Fitness Center  0.04
9   Japanese Restaurant  0.02


----Bang Kapi----
                 venue  freq
0          Coffee Shop  0.10
1         Noodle House  0.09
2   Som Tum Restaurant  0.07
3  Japanese Restaurant  0.06
4      Thai Restaurant  0.05
5       Clothing Store  0.04
6           Steakhouse  0.03
7               Market  0.03
8         Dessert Shop  0.03
9            BBQ Joint  0.03


----Bang Khae----
               venue  freq
0  Convenience Store  0.18
1       Noodle House  0.09
2               Café  0.09
3        Coffee Shop  0.06
4   Asian Restaurant  0.06
5      Shopping Mall  0.06
6    Thai Restaurant  0.06
7     Scenic Lookout  0.03
8        Beer Garden  0.03
9           Car Wash  0.

                    venue  freq
0             Coffee Shop  0.07
1               BBQ Joint  0.07
2       Convenience Store  0.06
3     Japanese Restaurant  0.06
4         Thai Restaurant  0.06
5       Hotpot Restaurant  0.04
6                     Bar  0.04
7        Asian Restaurant  0.04
8  Shabu-Shabu Restaurant  0.04
9    Fast Food Restaurant  0.04


----Phaya Thai----
                 venue  freq
0          Coffee Shop  0.10
1                 Café  0.06
2  Japanese Restaurant  0.06
3      Thai Restaurant  0.06
4     Sushi Restaurant  0.05
5         Noodle House  0.05
6           Restaurant  0.05
7                  Bar  0.05
8               Bakery  0.03
9   Som Tum Restaurant  0.03


----Phra Khanong----
                                      venue  freq
0                         Convenience Store  0.19
1                               Coffee Shop  0.09
2                              Noodle House  0.07
3                                     Hotel  0.06
4                        Chinese Re

In [46]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [62]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

district_venues_sorted = pd.DataFrame(columns=columns)
district_venues_sorted['District'] = bkk_grouped['District']

for ind in np.arange(bkk_grouped.shape[0]):
    district_venues_sorted.iloc[ind, 1:] = return_most_common_venues(bkk_grouped.iloc[ind, :], num_top_venues)

district_venues_sorted

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bang Bon,Noodle House,Convenience Store,Fast Food Restaurant,Coffee Shop,Gym / Fitness Center,Chinese Restaurant,Restaurant,Thai Restaurant,Som Tum Restaurant,Grocery Store
1,Bang Kapi,Coffee Shop,Noodle House,Som Tum Restaurant,Japanese Restaurant,Thai Restaurant,Clothing Store,Steakhouse,Dessert Shop,Fast Food Restaurant,Market
2,Bang Khae,Convenience Store,Noodle House,Café,Coffee Shop,Shopping Mall,Thai Restaurant,Asian Restaurant,Diner,Som Tum Restaurant,Bus Stop
3,Bang Khen,Fast Food Restaurant,Convenience Store,Noodle House,Asian Restaurant,Coffee Shop,Thai Restaurant,Hotpot Restaurant,Bookstore,Steakhouse,Restaurant
4,Bang Kho Laem,Thai Restaurant,Coffee Shop,Noodle House,Convenience Store,Chinese Restaurant,Café,Ice Cream Shop,Seafood Restaurant,Pub,Shopping Mall
5,Bang Khun Thian,Coffee Shop,Thai Restaurant,Noodle House,Japanese Restaurant,Asian Restaurant,Department Store,Pizza Place,Hotpot Restaurant,Ice Cream Shop,Gym / Fitness Center
6,Bang Na,Thai Restaurant,Coffee Shop,Fast Food Restaurant,Convenience Store,Café,Noodle House,Gas Station,Asian Restaurant,Restaurant,Pier
7,Bang Phlat,Thai Restaurant,Convenience Store,Noodle House,Café,Coffee Shop,Fast Food Restaurant,Hotel,Ice Cream Shop,Bakery,Japanese Restaurant
8,Bang Rak,Thai Restaurant,Noodle House,Café,Hotel,Coffee Shop,Chinese Restaurant,Spa,Seafood Restaurant,Hostel,Bakery
9,Bang Sue,Noodle House,Thai Restaurant,Convenience Store,Ice Cream Shop,Coffee Shop,Train Station,Hotpot Restaurant,Clothing Store,Market,BBQ Joint


In [60]:
kclusters = 6

bkk_grouped_clustering = bkk_grouped.drop('District', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(bkk_grouped_clustering)

kmeans.labels_[0:10] 

array([1, 0, 3, 1, 1, 0, 1, 3, 5, 3])

In [63]:
district_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [64]:
bkk_merged = df_bkk

bkk_merged = bkk_merged.join(district_venues_sorted.set_index('District'), on='District')

bkk_merged.head()

Unnamed: 0,District,Latitude,Longitude,Population18_40,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bang Bon,13.6592,100.3991,33565,1,Noodle House,Convenience Store,Fast Food Restaurant,Coffee Shop,Gym / Fitness Center,Chinese Restaurant,Restaurant,Thai Restaurant,Som Tum Restaurant,Grocery Store
1,Bang Kapi,13.765833,100.647778,46041,0,Coffee Shop,Noodle House,Som Tum Restaurant,Japanese Restaurant,Thai Restaurant,Clothing Store,Steakhouse,Dessert Shop,Fast Food Restaurant,Market
2,Bang Khae,13.696111,100.409444,62257,3,Convenience Store,Noodle House,Café,Coffee Shop,Shopping Mall,Thai Restaurant,Asian Restaurant,Diner,Som Tum Restaurant,Bus Stop
3,Bang Khen,13.873889,100.596389,61167,1,Fast Food Restaurant,Convenience Store,Noodle House,Asian Restaurant,Coffee Shop,Thai Restaurant,Hotpot Restaurant,Bookstore,Steakhouse,Restaurant
4,Bang Kho Laem,13.693333,100.5025,25477,1,Thai Restaurant,Coffee Shop,Noodle House,Convenience Store,Chinese Restaurant,Café,Ice Cream Shop,Seafood Restaurant,Pub,Shopping Mall


In [65]:
bkk_merged.shape

(50, 15)

In [66]:
map_clusters = folium.Map(location=[latitude_bkk, longitude_bkk], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(bkk_merged['Latitude'], bkk_merged['Longitude'], bkk_merged['District'], bkk_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [67]:
bkk_merged.loc[bkk_merged['Cluster Labels'] == 0, bkk_merged.columns[[0] + list(range(5, bkk_merged.shape[1]))]]

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Bang Kapi,Coffee Shop,Noodle House,Som Tum Restaurant,Japanese Restaurant,Thai Restaurant,Clothing Store,Steakhouse,Dessert Shop,Fast Food Restaurant,Market
5,Bang Khun Thian,Coffee Shop,Thai Restaurant,Noodle House,Japanese Restaurant,Asian Restaurant,Department Store,Pizza Place,Hotpot Restaurant,Ice Cream Shop,Gym / Fitness Center
10,Bangkok Noi,Noodle House,Café,Coffee Shop,Convenience Store,Supermarket,Som Tum Restaurant,Dessert Shop,Diner,Sushi Restaurant,Bar
13,Chatuchak,Coffee Shop,Thai Restaurant,Café,Gym / Fitness Center,Flea Market,Dessert Shop,Hotpot Restaurant,Fast Food Restaurant,Shopping Mall,Clothing Store
19,Khan Na Yao,Thai Restaurant,Coffee Shop,Japanese Restaurant,Bakery,Convenience Store,Asian Restaurant,Noodle House,Steakhouse,Som Tum Restaurant,Supermarket
20,Khlong Sam Wa,Noodle House,Japanese Restaurant,Convenience Store,Coffee Shop,Thai Restaurant,Zoo,Dessert Shop,Pool,Restaurant,Buddhist Temple
22,Khlong Toei,Coffee Shop,Convenience Store,Japanese Restaurant,Bakery,Gym / Fitness Center,Thai Restaurant,Noodle House,Café,Hotpot Restaurant,Hostel
25,Lat Phrao,Coffee Shop,Noodle House,Ice Cream Shop,Restaurant,Hotpot Restaurant,Convenience Store,Chinese Restaurant,Thai Restaurant,Asian Restaurant,Supermarket
30,Phasi Charoen,Coffee Shop,BBQ Joint,Convenience Store,Japanese Restaurant,Thai Restaurant,Bar,Fast Food Restaurant,Asian Restaurant,Shabu-Shabu Restaurant,Hotpot Restaurant
31,Phaya Thai,Coffee Shop,Japanese Restaurant,Café,Thai Restaurant,Sushi Restaurant,Noodle House,Bar,Restaurant,Burger Joint,Bakery


In [73]:
bkk_merged.loc[bkk_merged['Cluster Labels'] == 1, bkk_merged.columns[[0] + list(range(5, bkk_merged.shape[1]))]]

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bang Bon,Noodle House,Convenience Store,Fast Food Restaurant,Coffee Shop,Gym / Fitness Center,Chinese Restaurant,Restaurant,Thai Restaurant,Som Tum Restaurant,Grocery Store
3,Bang Khen,Fast Food Restaurant,Convenience Store,Noodle House,Asian Restaurant,Coffee Shop,Thai Restaurant,Hotpot Restaurant,Bookstore,Steakhouse,Restaurant
4,Bang Kho Laem,Thai Restaurant,Coffee Shop,Noodle House,Convenience Store,Chinese Restaurant,Café,Ice Cream Shop,Seafood Restaurant,Pub,Shopping Mall
6,Bang Na,Thai Restaurant,Coffee Shop,Fast Food Restaurant,Convenience Store,Café,Noodle House,Gas Station,Asian Restaurant,Restaurant,Pier
12,Bueng Kum,Thai Restaurant,Noodle House,Supermarket,Coffee Shop,Convenience Store,Café,Ice Cream Shop,Hotpot Restaurant,Dessert Shop,Fast Food Restaurant
15,Din Daeng,Coffee Shop,Noodle House,Bar,Thai Restaurant,Convenience Store,Restaurant,Hotel,Bakery,Sushi Restaurant,Café
16,Don Mueang,Coffee Shop,Convenience Store,Noodle House,Fast Food Restaurant,Airport Service,Restaurant,Thai Restaurant,Hotel,Airport,Sushi Restaurant
18,Huai Khwang,Thai Restaurant,Noodle House,Hotel,Convenience Store,Coffee Shop,Dessert Shop,Japanese Restaurant,Korean Restaurant,Seafood Restaurant,Café
23,Lak Si,Coffee Shop,Thai Restaurant,Noodle House,Convenience Store,Café,Hotel,Fast Food Restaurant,Japanese Restaurant,Gym / Fitness Center,Food Court
26,Min Buri,Thai Restaurant,Restaurant,Coffee Shop,Intersection,Asian Restaurant,Department Store,Furniture / Home Store,Noodle House,Massage Studio,Park


In [69]:
bkk_merged.loc[bkk_merged['Cluster Labels'] == 2, bkk_merged.columns[[0] + list(range(5, bkk_merged.shape[1]))]]

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Dusit,Noodle House,Thai Restaurant,Coffee Shop,Café,Asian Restaurant,Museum,Palace,Chinese Restaurant,Som Tum Restaurant,Dessert Shop
34,Pom Prap Sattru Phai,Noodle House,Thai Restaurant,Asian Restaurant,Chinese Restaurant,Hostel,Café,Museum,Hotel,Dessert Shop,Vegetarian / Vegan Restaurant
41,Sathon,Noodle House,Asian Restaurant,Thai Restaurant,Coffee Shop,Hotel,Chinese Restaurant,Café,Bar,Dessert Shop,Steakhouse
42,Suan Luang,Noodle House,Thai Restaurant,Coffee Shop,Som Tum Restaurant,Café,Convenience Store,Japanese Restaurant,Supermarket,Seafood Restaurant,Shopping Mall
43,Taling Chan,Noodle House,Convenience Store,Thai Restaurant,Coffee Shop,Café,Soccer Field,Seafood Restaurant,Asian Restaurant,Badminton Court,Chinese Restaurant


In [71]:
bkk_merged.loc[bkk_merged['Cluster Labels'] == 3, bkk_merged.columns[[0] + list(range(5, bkk_merged.shape[1]))]]

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Bang Khae,Convenience Store,Noodle House,Café,Coffee Shop,Shopping Mall,Thai Restaurant,Asian Restaurant,Diner,Som Tum Restaurant,Bus Stop
7,Bang Phlat,Thai Restaurant,Convenience Store,Noodle House,Café,Coffee Shop,Fast Food Restaurant,Hotel,Ice Cream Shop,Bakery,Japanese Restaurant
9,Bang Sue,Noodle House,Thai Restaurant,Convenience Store,Ice Cream Shop,Coffee Shop,Train Station,Hotpot Restaurant,Clothing Store,Market,BBQ Joint
11,Bangkok Yai,Noodle House,Convenience Store,Japanese Restaurant,Chinese Restaurant,Train Station,Coffee Shop,Dessert Shop,Hotpot Restaurant,Asian Restaurant,Seafood Restaurant
24,Lat Krabang,Thai Restaurant,Noodle House,Convenience Store,Hotel,Café,Rental Car Location,Som Tum Restaurant,Bar,Steakhouse,Train Station
28,Nong Khaem,Convenience Store,Asian Restaurant,Market,Beer Garden,Thai Restaurant,Chinese Restaurant,Flea Market,Noodle House,Bus Stop,Dessert Shop
32,Phra Khanong,Convenience Store,Coffee Shop,Noodle House,Hotel,Italian Restaurant,Chinese Restaurant,Café,Residential Building (Apartment / Condo),Fast Food Restaurant,Ice Cream Shop
38,Sai Mai,Convenience Store,Thai Restaurant,Noodle House,Market,Bar,Gym / Fitness Center,Supermarket,Soup Place,Food Truck,Ice Cream Shop
44,Thawi Watthana,Thai Restaurant,Noodle House,Convenience Store,Asian Restaurant,Furniture / Home Store,Coffee Shop,Café,Supermarket,Market,Clothing Store
45,Thon Buri,Noodle House,Convenience Store,Thai Restaurant,Asian Restaurant,Coffee Shop,Seafood Restaurant,Hotpot Restaurant,Dessert Shop,Café,Steakhouse


In [70]:
bkk_merged.loc[bkk_merged['Cluster Labels'] == 4, bkk_merged.columns[[0] + list(range(5, bkk_merged.shape[1]))]]

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Chom Thong,Convenience Store,Thai Restaurant,Coffee Shop,Noodle House,Bus Station,Asian Restaurant,Hotpot Restaurant,Fast Food Restaurant,Bus Stop,Grocery Store
35,Prawet,Convenience Store,Noodle House,Asian Restaurant,Thai Restaurant,Electronics Store,Som Tum Restaurant,Resort,Halal Restaurant,Café,Hobby Shop


In [72]:
bkk_merged.loc[bkk_merged['Cluster Labels'] == 5, bkk_merged.columns[[0] + list(range(5, bkk_merged.shape[1]))]]

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Bang Rak,Thai Restaurant,Noodle House,Café,Hotel,Coffee Shop,Chinese Restaurant,Spa,Seafood Restaurant,Hostel,Bakery
21,Khlong San,Chinese Restaurant,Noodle House,Hotel,Bar,Dessert Shop,Café,Coffee Shop,Spa,Art Gallery,Thai Restaurant
29,Pathum Wan,Noodle House,Dessert Shop,Hotel,Asian Restaurant,Coffee Shop,Seafood Restaurant,Bar,Hostel,Café,Shopping Mall
33,Phra Nakhon,Thai Restaurant,Hotel,Hostel,Bar,Café,Noodle House,Vegetarian / Vegan Restaurant,Massage Studio,Asian Restaurant,Spa
37,Ratchathewi,Hotel,Coffee Shop,Café,Hostel,Steakhouse,Massage Studio,Thai Restaurant,Noodle House,Convenience Store,Food Court
39,Samphanthawong,Noodle House,Hotel,Bar,Chinese Restaurant,Coffee Shop,Art Gallery,Thai Restaurant,Spa,Dessert Shop,Café


# Phaya Thai is in clusters No. 1

In [74]:
df_c3 = bkk_merged.loc[bkk_merged['Cluster Labels'] == 0, bkk_merged.columns[[0]]]
df_c3

Unnamed: 0,District
1,Bang Kapi
5,Bang Khun Thian
10,Bangkok Noi
13,Chatuchak
19,Khan Na Yao
20,Khlong Sam Wa
22,Khlong Toei
25,Lat Phrao
30,Phasi Charoen
31,Phaya Thai


In [93]:
df_best = pd.merge(df_bestloc1, df_c3, how='inner', left_on=['District'], right_on=['District'])
df_best

Unnamed: 0,District,Population18_40,Coffee shop,Per Person,Latitude,Longitude
0,Khlong Sam Wa,67490,3.0,16872.5,13.859722,100.704167


In [76]:
map_bkk = folium.Map(location=[latitude_bkk, longitude_bkk], zoom_start=10)

for lat, lng, label in zip(df_best['Latitude'], df_best['Longitude'], df_best['District']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bkk) 
    
map_bkk