In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!pip install folium
import folium # map rendering library

print('Libraries imported.')

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/a4/f0/44e69d50519880287cc41e7c8a6acc58daa9a9acf5f6afc52bcc70f69a6d/folium-0.11.0-py2.py3-none-any.whl (93kB)
[K     |████████████████████████████████| 102kB 7.4MB/s ta 0:00:011
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/13/fb/9eacc24ba3216510c6b59a4ea1cd53d87f25ba76237d7f4393abeaf4c94e/branca-0.4.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0
Libraries imported.


In [2]:
address = 'Austin, Texas'
geolocator = Nominatim(user_agent='Austin_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [24]:
# create a map for visual reference

map = folium.Map(location=[latitude, longitude], zoom_start=13)

map

In [4]:
CLIENT_ID = 'GKP4FOXVFFPOS40Y13NFEIEZCHC0RUMLLQHLC5CMKTMYDCP3' # your Foursquare ID
CLIENT_SECRET = 'WJTLHYSROXKHC5U42EMYX5MFITCNUWBDMGJKYEGQUPZUE3NS' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: GKP4FOXVFFPOS40Y13NFEIEZCHC0RUMLLQHLC5CMKTMYDCP3
CLIENT_SECRET:WJTLHYSROXKHC5U42EMYX5MFITCNUWBDMGJKYEGQUPZUE3NS


In [15]:
def getNearbyVenues(name, lat, lng, LIMIT=100, radius=10000, categoryId='4bf58dd8d48988d1d2941735'):
    
    venues_list=[]

        # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&limit={}&radius={}&categoryId={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, 
        lng, 
        LIMIT,
        radius,
        categoryId)
    
            
        # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
    venues_list.append([(
        v['venue']['name'], 
        v['venue']['location']['lat'], 
        v['venue']['location']['lng'],  
        v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = [
                  'Venue', 
                  'Venue_Latitude', 
                  'Venue_Longitude', 
                  'Venue_Category']
    
    return(nearby_venues)

In [16]:
Austin_venues = getNearbyVenues(name='Austin',
                                   lat=latitude,
                                   lng=longitude)

In [17]:
Austin_venues

Unnamed: 0,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
0,Maiko Sushi Lounge,30.268841,-97.745724,Sushi Restaurant
1,Bar Chi Sushi,30.265188,-97.745243,Sushi Restaurant
2,Uchi,30.257594,-97.759891,Sushi Restaurant
3,Sushi Junai,30.279057,-97.741577,Sushi Restaurant
4,Uchiko,30.310836,-97.739772,Sushi Restaurant
5,Sushi Niichi,30.287959,-97.745375,Sushi Restaurant
6,Lucky Robot Japanese Kitchen,30.250943,-97.749165,Sushi Restaurant
7,Komé,30.314071,-97.714473,Sushi Restaurant
8,Musashino Sushi Dokoro,30.297917,-97.74739,Sushi Restaurant
9,Fukumoto,30.264757,-97.73165,Japanese Restaurant


It appears that some of the imported venues were misclassified or had Sushi as a secondary tag. To rectify this, we will remove all of the non sushi and non japanese restaurants

In [18]:
Austin_Sushi = Austin_venues[Austin_venues.Venue_Category == ('Sushi Restaurant')]
Austin_Jp = Austin_venues[Austin_venues.Venue_Category == ('Japanese Restaurant')]
Austin_Sushi = Austin_Sushi.append(Austin_Jp)

In [19]:
# set number of clusters
kclusters = 5

Austin_clustering = Austin_Sushi.drop('Venue_Category', 1)
Austin_clustering = Austin_clustering.drop('Venue', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Austin_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 


array([0, 0, 0, 2, 2, 2, 0, 1, 2, 0], dtype=int32)

In [20]:
# add clustering labels
Austin_Sushi.insert(0, 'Cluster Labels', kmeans.labels_)
Austin_Sushi.head()

Unnamed: 0,Cluster Labels,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
0,0,Maiko Sushi Lounge,30.268841,-97.745724,Sushi Restaurant
1,0,Bar Chi Sushi,30.265188,-97.745243,Sushi Restaurant
2,0,Uchi,30.257594,-97.759891,Sushi Restaurant
3,2,Sushi Junai,30.279057,-97.741577,Sushi Restaurant
4,2,Uchiko,30.310836,-97.739772,Sushi Restaurant


In [23]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Austin_Sushi['Venue_Latitude'], Austin_Sushi['Venue_Longitude'], Austin_Sushi['Venue'], Austin_Sushi['Cluster Labels']):
    label = folium.Popup(str(poi) + 'Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters
