In [4]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes 
import folium 

print('Libraries imported.')

Libraries imported.


##### Define Foursquare Credentials and Version

In [1]:
# Pull API keys from the file
with open('4square_keys.txt','r') as f:
    CLIENT_ID, CLIENT_SECRET = [l.strip() for l in f.readlines()]

VERSION = '20180604'
LIMIT = 500

#### Explore venues around my work place 

In [5]:
address = 'Russia, Taganrog'
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, ',', longitude)

47.2153657 , 38.9285216


In [6]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            latitude, 
            longitude, 
            500, 
            LIMIT)

In [33]:
import json, codecs
def json_to_file(data, name='work_nbrs.json'):
    with open(name, 'wb') as f:
        json.dump(data, codecs.getwriter('utf-8')(f), ensure_ascii=False)

In [54]:
def searchVenuesInRect(sw, ne):
    
    venues_list=[]
    
    def get_category(cat_list):
        if len(cat_list) == 0: return None
        else: return cat_list[0]['name']
              
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&intent=browse&sw={},{}&ne={},{}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, VERSION, 
            sw[0], sw[1], ne[0], ne[1],
            LIMIT)
            
    # make the GET request
    results = requests.get(url).json()['response']['venues']
        
    # return only relevant information for each nearby venue
    venues_list.append([(
        v['name'], 
        v['location']['lat'], 
        v['location']['lng'],  
        get_category(v['categories']),
        v['id']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = [
        'Venue', 'Latitude', 'Longitude', 'Category', 'Id']
    
    return(nearby_venues)
        

In [55]:
def searchVenuesInCircle(c, r):
    
    venues_list=[]
    
    def get_category(cat_list):
        if len(cat_list) == 0:
            return None
        else:
            return cat_list[0]['name']
              
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            c[0], 
            c[1], 
            r,             
            LIMIT)
            
    # make the GET request
    results = requests.get(url).json()['response']['venues']
        
    # return only relevant information for each nearby venue
    venues_list.append([(
        v['name'], 
        v['location']['lat'], 
        v['location']['lng'],  
        get_category(v['categories']),
        v['id']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = [
        'Venue', 'Latitude', 'Longitude', 'Category', 'Id']
    
    return(nearby_venues)
        

In [56]:
sw = [47.2153657 , 38.9285216]
ne = [47.2153657 , 38.9285216]
venues = searchVenuesInRect(sw=sw, ne=ne)
venues.head()

Unnamed: 0,Venue,Latitude,Longitude,Category,Id
0,Площадь перед администрацией города,47.215733,38.92823,Plaza,5368f4ad498ea0cb80cef632
1,Администрация Таганрога,47.215517,38.92842,City Hall,4da693d90cb66f658708dafc
2,Театр имени А. П. Чехова,47.216325,38.928217,Theater,4dcbe98a1f6ea1401d49d12a
3,Л'Этуаль,47.215416,38.929266,Cosmetics Shop,4f83002ee4b0b2237e8a6cb1
4,Культ вина,47.21551,38.92931,Wine Bar,5c74142e60255e002c1aefbc


#### Create a map of Taganrog city

In [33]:
# plot venues on map
def plot_venues(venues, loc):
    map_ = folium.Map(location=loc, zoom_start=13)
   
    # add markers to map
    for lat, lng, name, cat in zip(venues['Latitude'], venues['Longitude'], venues['Venue'], venues['Category']):
        label = '{}, {}'.format(name, cat)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_)  
        
    return map_

In [34]:
# plot venues with rectangle on map
def plot_venues_with_rect(venues, sw, ne):
    map_ = plot_venues(venues, sw)
    # add bounding box 
    map_.fit_bounds([sw, ne])
    folium.Rectangle([sw, ne], color='red').add_to(map_)
    return map_

In [32]:
# plot venues on map with a search region
def plotVenuesEx(venues, sw, ne, r=None):
    map_tag = plot_venues(venues, sw)

    # add bounding box or circle to map
    if r == None:
        folium.Rectangle([sw, ne], color='red').add_to(map_tag)
    else:
        folium.Circle(sw, r, color='red').add_to(map_tag)
        
    return map_tag

In [347]:
center = [47.2153657, 38.9285216]
venues = searchVenuesInRect(sw=center, ne=[center[0]+0.005, center[1]+0.005])
#plotVenuesEx(venues=venues, sw=center, ne=[center[0]+0.005, center[1]+0.005])
plot_venues(venues, center)

#### Get the city bounding box

In [28]:
url_bounds = 'https://nominatim.openstreetmap.org/search?format=json&q=Taganrog, Russia&polygon_geojson=1'
# get borders in json format
result = requests.get(url_bounds).json()[0]
bounds = result['boundingbox']
# convert to float
city_rect = [float(i) for i in bounds] 
city_center = [float(result['lat']), float(result['lon'])]
print('city rect:', city_rect)
print('city center:', city_center)

city rect: [47.1887122, 47.2899427, 38.7929772, 38.9701473]
city center: [47.2153657, 38.9285216]


#### Get the list of all venues in the city

In [None]:
grid_step = 0.005
city_venues = pd.DataFrame(columns=['Venue', 'Latitude', 'Longitude', 'Category', 'Id'])
for lat in np.arange(city_rect[0], city_rect[1], grid_step):
    for lon in np.arange(city_rect[2], city_rect[3], grid_step):
        loc_venues = searchVenuesInRect(sw=[lat, lon], ne=[lat+grid_step, lon+grid_step])
        city_venues = city_venues.append(loc_venues)

print(city_venues.shape)
city_venues.head()

In [472]:
city_venues = city_venues.dropna(subset=['Category'])

In [None]:
city_venues = city_venues.drop_duplicates('Id')
city_venues = city_venues.reset_index(drop=True)

#### Save the dataset to csv

In [40]:
# save dataframe to csv file
city_venues.to_csv('city_venues.csv', index=False)

#### Draw the coordinate grid

In [141]:
map_tag = folium.Map(location=[city_rect[0], city_rect[2]], zoom_start=12)

for lat in np.arange(city_rect[0], city_rect[1], grid_step):
    for lon in np.arange(city_rect[2], city_rect[3], grid_step):
        folium.Rectangle([[lat, lon], [lat+grid_step, lon+grid_step]], color='red', weight=0.3).add_to(map_tag)
        
map_tag.fit_bounds([[city_rect[0], city_rect[2]], [city_rect[1], city_rect[3]]])
map_tag

### Get categories

In [36]:
url_cat = 'https://api.foursquare.com/v2/venues/categories?client_id={}&client_secret={}&v={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION)
url_cat

'https://api.foursquare.com/v2/venues/categories?client_id=V53MHPBBL23EVUFTF31HTKJAFRJIK2QL1WJURTZ3ARRFF3KE&client_secret=G5Z0ALDAAKEOPESPXKBZPXNB2TIEJKGWM33ZGSBCNW0VGW0T&v=20180604'

In [37]:
result = requests.get(url_cat).json()
df_cat = json_normalize(result['response']['categories'])
df_cat.head()

Unnamed: 0,id,name,pluralName,shortName,categories,icon.prefix,icon.suffix
0,4d4b7104d754a06370d81259,Arts & Entertainment,Arts & Entertainment,Arts & Entertainment,"[{'id': '56aa371be4b08b9a8d5734db', 'name': 'A...",https://ss3.4sqi.net/img/categories_v2/arts_en...,.png
1,4d4b7105d754a06372d81259,College & University,Colleges & Universities,College & Education,"[{'id': '4bf58dd8d48988d198941735', 'name': 'C...",https://ss3.4sqi.net/img/categories_v2/educati...,.png
2,4d4b7105d754a06373d81259,Event,Events,Event,"[{'id': '52f2ab2ebcbc57f1066b8b3b', 'name': 'C...",https://ss3.4sqi.net/img/categories_v2/event/d...,.png
3,4d4b7105d754a06374d81259,Food,Food,Food,"[{'id': '503288ae91d4c4b30a586d67', 'name': 'A...",https://ss3.4sqi.net/img/categories_v2/food/de...,.png
4,4d4b7105d754a06376d81259,Nightlife Spot,Nightlife Spots,Nightlife,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",https://ss3.4sqi.net/img/categories_v2/nightli...,.png


In [38]:
def findCategory(cat_list, cat_id):
    if len(cat_list) == 0: return False
    df = json_normalize(cat_list)
    if df[df.id == cat_id].shape[0] > 0: return True
    for c in df.categories: 
        if findCategory(c, cat_id): return True
    return False

def isFoodCategory(df, cat_id):
    food_id = '4d4b7105d754a06374d81259'
    if cat_id == food_id: return True
    if findCategory(df.loc[df.id == food_id, 'categories'].to_list()[0], cat_id): return True
    return False

In [39]:
isFoodCategory(df_cat, '4bf58dd8d48988d15b941735')

False

In [41]:
city_venues = pd.read_csv('city_clean.csv')
city_venues.shape

(3424, 12)

In [49]:
mask = city_venues['Category_id'].map(lambda x: isFoodCategory(df_cat, x))
food_venues = city_venues.loc[mask]
city_venues['Food_venue'] = mask * 1
city_venues.head()

Unnamed: 0,Venue,Latitude,Longitude,Category,Id,Rating,Likes,Tips,PostalCode,Price_tier,createdAt,Category_id,Food_venue
0,Место Разворота Маршруток,47.202091,38.854572,Bus Line,5183984a498e46c7f9c5af0a,0.0,0.0,1.0,347900.0,0.0,1367579000.0,4bf58dd8d48988d12b951735,0
1,Ресторан Пирамила,47.217564,38.855255,Eastern European Restaurant,52f6564c498e9e09e3e33147,0.0,0.0,0.0,347900.0,2.0,1391876000.0,4bf58dd8d48988d109941735,1
2,штрафстоянка,47.221715,38.831101,Parking,54bba83b498efc89a60986e4,0.0,0.0,0.0,347900.0,0.0,1421584000.0,4c38df4de52ce0d596b336e1,0
3,ДокАвто,47.223408,38.858902,Gas Station,5275e250498ed7d6b3483e96,0.0,0.0,0.0,347900.0,0.0,1383457000.0,4bf58dd8d48988d113951735,0
4,Евролюкс,47.22461,38.844511,General Travel,506bf89fe4b0101523e50d28,0.0,1.0,0.0,347900.0,0.0,1349253000.0,4bf58dd8d48988d1f6931735,0


In [50]:
plot_venues_with_rect(food_venues, [city_rect[0], city_rect[2]], [city_rect[1], city_rect[3]])

In [54]:
city_venues.to_csv('city_venues_in_box_with_food_field.csv', index=False)

In [56]:
# Кафе и рестораны -- 4d4b7105d754a06374d81259
# Ночное заведение -- 4d4b7105d754a06376d81259
# Ферма -- 4bf58dd8d48988d15b941735

### Get detailed information about each venue

In [17]:
# Pull API keys from the file
with open('4square_keys2.txt','r') as f:
    CLIENT_ID, CLIENT_SECRET = [l.strip() for l in f.readlines()]

VERSION = '20180604'
LIMIT = 500

In [18]:
venue_id = '52f6564c498e9e09e3e33147' # ID of Л'Этуаль
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/52f6564c498e9e09e3e33147?client_id=QBVCW2Y2UON3G5TQWKAZ30TEQ1ZQFJIPO4VTQTFGCEYA1GT4&client_secret=KNYQOE1VQZT2BMWLSXQAV3LEPMMATCPCOMCZJ5LB1YOBUYYQ&v=20180604'

##### Get the venue's details 

In [19]:
# Get the venue's overall rating
def rating_value(venue):
    try: return venue['response']['venue']['rating']
    except: return 0

# Get the number of tips
def tips_count(venue):
    try: return venue['response']['venue']['tips']['count']
    except: return 0

# Get the number of likes
def likes_count(venue):
    try: return venue['response']['venue']['likes']['count']
    except: return 0

def api_call_succeeded(json):
    try: return json['meta']['code'] == 200
    except: return False

# get postal code or 347900 if absent
def postal_code(venue):
    try: return venue['response']['venue']['location']['postalCode']
    except: return 347900
    
# get category id of a venue
def category_id(venue):
    try: 
        cat_list = venue['response']['venue']['categories']
    except: 
        return None
    
    if len(cat_list) == 0: 
        return None
    else: 
        return cat_list[0]['id']
    
# the price tier from 1 (least pricey) - 4 (most pricey)
def price_tier(venue):
    try: return venue['response']['venue']['price']['tier']
    except: return 0
    
# Seconds since epoch when the venue was created.
def createdAt(venue):
    try: return (venue['response']['venue']['createdAt'])
    except: return 0    

In [20]:
result = requests.get(url).json()
print('rating:', rating_value(result))
print('tips count:', tips_count(result))
print('likes count:', likes_count(result))
print('postalcode:', postal_code(result))
print('cat_id:', category_id(result))
print('price:', price_tier(result))
print('createdAt:', createdAt(result))
print('Succeeded:', api_call_succeeded(result))

rating: 0
tips count: 0
likes count: 0
postalcode: 347900
cat_id: 4bf58dd8d48988d109941735
price: 2
createdAt: 1391875660
Succeeded: True


In [13]:
def get_details(venue_id):
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
    response = requests.get(url)
    if response.ok:
        result = response.json()
        return [api_call_succeeded(result),
                rating_value(result), 
                likes_count(result), 
                tips_count(result),
                postal_code(result),
                price_tier(result),
                createdAt(result),
                category_id(result)]
    else:
        return [False, None, None, None, None, None, None, None]


### Create additional dataset with venue's details

In [73]:
all_details = pd.DataFrame(columns=['Id', 'processed', 'Rating', 'Likes', 'Tips', 'PostalCode', 'Price_tier', 'createdAt', 'Category_id'])
all_details['Id'] = city_venues['Id']
print(all_details.shape)
all_details.head()

(3525, 9)


Unnamed: 0,Id,processed,Rating,Likes,Tips,PostalCode,Price_tier,createdAt,Category_id
0,55a0e60c498e873de699ccd1,,,,,,,,
1,58401718e9233e42ab37615e,,,,,,,,
2,51ecf0f2498edbbbb8c6c29d,,,,,,,,
3,53f917c4498e47b1dc40249e,,,,,,,,
4,516166d9e4b0d19a95e6b214,,,,,,,,


#### Fill in the dataset with data

In [10]:
# all_details = pd.read_csv('city_venues_details.csv')

In [22]:
import time
for ind in np.arange(all_details.shape[0]):
    if all_details.at[ind, 'processed'] != True:        
        all_details.iloc[ind, 1:] = get_details(all_details.iloc[ind, 0])
        time.sleep(0.4) # delay in seconds
    
all_details.loc[all_details['processed'] == True].shape

(3525, 9)

In [29]:
print(all_details.shape)
all_details.loc[all_details['processed'] == True].shape

(3525, 9)


(3525, 9)

In [30]:
# drop the temporary column 'processed'
all_details = all_details.drop('processed', 1)
all_details.head()

Unnamed: 0,Id,Rating,Likes,Tips,PostalCode,Price_tier,createdAt,Category_id
0,55a0e60c498e873de699ccd1,0.0,7.0,0.0,347900,0.0,1436608000.0,4bf58dd8d48988d132951735
1,58401718e9233e42ab37615e,5.8,1.0,0.0,347900,0.0,1480595000.0,52f2ab2ebcbc57f1066b8b46
2,51ecf0f2498edbbbb8c6c29d,0.0,0.0,0.0,347900,0.0,1374483000.0,4bf58dd8d48988d1e2941735
3,53f917c4498e47b1dc40249e,0.0,1.0,0.0,347900,0.0,1408833000.0,4bf58dd8d48988d1de941735
4,516166d9e4b0d19a95e6b214,0.0,0.0,0.0,347900,0.0,1365338000.0,4bf58dd8d48988d12b951735


#### Save details dataframe to csv file

In [32]:
# save dataframe to csv file
all_details.to_csv('city_venues_details.csv', index=False)

#### Join two datasets into a new one using venue id

In [39]:
# city_venues = city_venues.join(all_details.set_index('Id'), on='Id')
city_venues.head()

Unnamed: 0,Venue,Latitude,Longitude,Category,Id,Rating,Likes,Tips,PostalCode,Price_tier,createdAt,Category_id
0,золотое руно,47.176498,38.810475,Hotel Pool,55a0e60c498e873de699ccd1,0.0,7.0,0.0,347900,0.0,1436608000.0,4bf58dd8d48988d132951735
1,Пятерочка,47.182352,38.818977,Supermarket,58401718e9233e42ab37615e,5.8,1.0,0.0,347900,0.0,1480595000.0,52f2ab2ebcbc57f1066b8b46
2,Петрушанская Коса,47.174113,38.865204,Beach,51ecf0f2498edbbbb8c6c29d,0.0,0.0,0.0,347900,0.0,1374483000.0,4bf58dd8d48988d1e2941735
3,село Боцманово,47.173903,38.75593,Vineyard,53f917c4498e47b1dc40249e,0.0,1.0,0.0,347900,0.0,1408833000.0,4bf58dd8d48988d1de941735
4,"Остановка ""Новозолотовка""",47.179577,38.795387,Bus Line,516166d9e4b0d19a95e6b214,0.0,0.0,0.0,347900,0.0,1365338000.0,4bf58dd8d48988d12b951735


### Draw all venues on map

In [66]:
#city_venues = pd.read_csv('city_venues.csv')
city_venues.columns

Index(['Venue', 'Latitude', 'Longitude', 'Category', 'Id', 'Rating', 'Likes',
       'Tips', 'PostalCode', 'Price_tier', 'createdAt', 'Category_id',
       'Food_venue'],
      dtype='object')

### Pre-processing

In [355]:
venues_clustering = pd.get_dummies(city_venues[['Category']], prefix="", prefix_sep="")
filtered_cols = ['Latitude', 'Longitude', 'Rating', 'Likes', 'Tips', 'Price_tier', 'Food_venue', 'Venue_age']

# add filtered columns back to dataframe
venues_clustering[['Latitude', 'Longitude']] = city_venues[['Latitude', 'Longitude']]

# move latitude and longitude columns to be first
fixed_columns = [venues_clustering.columns[-2], venues_clustering.columns[-1]] + list(venues_clustering.columns[:-2])
venues_clustering = venues_clustering[fixed_columns]

venues_clustering.head()

Unnamed: 0,Latitude,Longitude,Arcade,Art Gallery,Asian Restaurant,Athletics & Sports,Auto Dealership,Bakery,Bank,Bar,...,Theater,Theme Park,Toy / Game Store,Turkish Restaurant,Vegetarian / Vegan Restaurant,Warehouse,Wine Bar,Wine Shop,Winery,Zoo
0,47.21752,38.929149,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,47.215685,38.931146,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,47.217143,38.931138,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,47.216197,38.933381,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,47.216325,38.928217,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


#### Normalize the dataset

In [356]:
from sklearn.preprocessing import StandardScaler

X = venues_clustering.values
# X = np.nan_to_num(X)
venues_norm = StandardScaler().fit_transform(X)
venues_norm

array([[ 0.65119305,  0.09232256, -0.07254763, ..., -0.07254763,
        -0.07254763, -0.07254763],
       [-0.41246245,  0.77070172, -0.07254763, ..., -0.07254763,
        -0.07254763, -0.07254763],
       [ 0.43257697,  0.76795734, -0.07254763, ..., -0.07254763,
        -0.07254763, -0.07254763],
       ...,
       [-0.91038484,  0.4877238 , -0.07254763, ..., -0.07254763,
        -0.07254763, -0.07254763],
       [ 0.94469792, -1.27558648, -0.07254763, ..., -0.07254763,
        -0.07254763, -0.07254763],
       [-0.52275722,  0.23528503, -0.07254763, ..., -0.07254763,
        -0.07254763, -0.07254763]])

#### Cluster venues
Run k-means to cluster all venues into 5 clusters.

In [357]:
num_clusters = 5

k_means = KMeans(init="k-means++", n_clusters=num_clusters, n_init=12)
k_means.fit(venues_norm)
labels = k_means.labels_

print(labels)

[3 3 3 3 3 3 3 3 4 4 4 3 3 3 3 3 3 1 3 3 4 3 3 4 3 4 3 3 3 3 3 3 3 3 4 3 4
 3 4 3 4 4 3 3 4 3 3 0 3 3 3 3 3 3 2 4 3 3 3 4 4 0 3 3 3 3 4 4 3 3 3 4 3 3
 3 3 3 3 3 3 4 3 3 4 3 3 4 3 3 3 4 4 4 4 3 4 4 3 3 3 3 4 3 3 3 3 3 3 4 3 3
 3 3 3 4 4 3 4 3 3 3 4 3 3 4 3 3 3 1 4 3 3 3 3 3 3 3 3 3 3 3 0 3 3 3 3 3 3
 3 3 3 3 3 4 1 4 4 3 3 3 3 1 4 4 3 4 3 3 4 3 3 3 3 3 3 3 4 4 4 3 3 3 3 3 3
 3 3 3 3 4 3]


#### Assigned a label to each venue

In [358]:
venues['Labels'] = labels
venues.head()

Unnamed: 0,Venue,Latitude,Longitude,Category,Id,Labels
0,"ФГУП ""Федеральный Кадастровый Центр ""Земля""",47.21752,38.929149,Government Building,4e5e11bcd22d7239c19cb7e8,3
1,Три Орешка,47.215685,38.931146,Coffee Shop,53b98963498e9ba407ec3c00,3
2,Альбатрос,47.217143,38.931138,Café,5166e60be4b07cad39a35d31,3
3,"ЗМК ""Кристалл""",47.216197,38.933381,Warehouse,53be56dd498e1c9fd519683c,3
4,Театр имени А. П. Чехова,47.216325,38.928217,Theater,4dcbe98a1f6ea1401d49d12a,3


In [369]:
import matplotlib.cm as cm
import matplotlib.colors as colors

def plot_clusters(venues, center, kclusters):
    # create map
    map_clusters = folium.Map(location=[center[0], center[1]], zoom_start=12)

    # set color scheme for the clusters
    x = np.arange(kclusters)
    ys = [i + x + (i*x)**2 for i in range(kclusters)]
    colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
    rainbow = [colors.rgb2hex(i) for i in colors_array]

    # add markers to the map
    markers_colors = []
    for lat, lon, cat, cluster in zip(venues['Latitude'], venues['Longitude'], venues['Category'], venues['Labels']):
        label = folium.Popup('Cluster' + str(cluster) + ' - ' + cat, parse_html=True)
        folium.CircleMarker(
            [lat, lon],
            radius=5,
            popup=label,
            color=rainbow[cluster-1],
            fill=True,
            fill_color=rainbow[cluster-1],
            fill_opacity=0.7).add_to(map_clusters)
       
    return map_clusters

In [370]:
plot_clusters(venues, center, 5)

In [436]:
details.at[1, 'processed'] = False