In [16]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes 
import folium 

print('Libraries imported.')

Libraries imported.


##### Define Foursquare Credentials and Version

In [36]:
# Pull API keys from the file
with open('4square_keys.txt','r') as f:
    CLIENT_ID, CLIENT_SECRET = [l.strip() for l in f.readlines()]

VERSION = '20180604'
LIMIT = 500

#### Explore venues around my work place 

In [52]:
address = 'Russia, Taganrog'
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, ',', longitude)

47.2153657 , 38.9285216


In [49]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            latitude, 
            longitude, 
            500, 
            LIMIT)

In [33]:
import json, codecs
def json_to_file(data, name='work_nbrs.json'):
    with open(name, 'wb') as f:
        json.dump(data, codecs.getwriter('utf-8')(f), ensure_ascii=False)

In [55]:
def searchVenuesInRect(sw, ne):
    
    venues_list=[]
    
    def get_category(cat_list):
        if len(cat_list) == 0:
            return None
        else:
            return cat_list[0]['name']
              
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&intent=browse&sw={},{}&ne={},{}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            sw[0], 
            sw[1], 
            ne[0], 
            ne[1],
            LIMIT)
            
    # make the GET request
    results = requests.get(url).json()['response']['venues']
        
    # return only relevant information for each nearby venue
    venues_list.append([(
        v['name'], 
        v['location']['lat'], 
        v['location']['lng'],  
        get_category(v['categories']),
        v['id']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = [
        'Venue', 'Latitude', 'Longitude', 'Category', 'Id']
    
    return(nearby_venues)
        

In [56]:
def searchVenuesInCircle(c, r):
    
    venues_list=[]
    
    def get_category(cat_list):
        if len(cat_list) == 0:
            return None
        else:
            return cat_list[0]['name']
              
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            c[0], 
            c[1], 
            r,             
            LIMIT)
            
    # make the GET request
    results = requests.get(url).json()['response']['venues']
        
    # return only relevant information for each nearby venue
    venues_list.append([(
        v['name'], 
        v['location']['lat'], 
        v['location']['lng'],  
        get_category(v['categories']),
        v['id']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = [
        'Venue', 'Latitude', 'Longitude', 'Category', 'Id']
    
    return(nearby_venues)
        

In [57]:
sw = [47.2153657 , 38.9285216]
ne = [47.2153657 , 38.9285216]
venues = searchVenuesInRect(sw=sw, ne=ne)
venues.head()

Unnamed: 0,Venue,Latitude,Longitude,Category,Id
0,Площадь перед администрацией города,47.215733,38.92823,Plaza,5368f4ad498ea0cb80cef632
1,Культ вина,47.21551,38.92931,Wine Bar,5c74142e60255e002c1aefbc
2,Администрация Таганрога,47.215517,38.92842,City Hall,4da693d90cb66f658708dafc
3,Астор,47.215428,38.929226,Lounge,4e3fcb1a1495bf24a5f83465
4,Театр имени А. П. Чехова,47.216325,38.928217,Theater,4dcbe98a1f6ea1401d49d12a


#### Create a map of Taganrog city

In [68]:
# plot venues on map
def plot_venues(venues):
    map_ = folium.Map(location=sw, zoom_start=13)
   
    # add markers to map
    for lat, lng, name, cat in zip(venues['Latitude'], venues['Longitude'], venues['Venue'], venues['Category']):
        label = '{}, {}'.format(name, cat)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_)  
        
    return map_

In [71]:
# plot venues on map
def plotVenuesEx(venues, sw, ne, r=None):
    map_tag = plot_venues(venues)

    # add bounding box or circle to map
    if r == None:
        folium.Rectangle([sw, ne], color='red').add_to(map_tag)
    else:
        folium.Circle(sw, r, color='red').add_to(map_tag)
        
    return map_tag

In [73]:
center = [47.2153657, 38.9285216]
venues = searchVenuesInRect(sw=center, ne=[center[0]+0.005, center[1]+0.005])
#plotVenuesEx(venues=venues, sw=center, ne=[center[0]+0.005, center[1]+0.005])
plot_venues(venues)

#### Get the city bounding box

In [17]:
url_bounds = 'https://nominatim.openstreetmap.org/search?format=json&q=Taganrog, Russia&polygon_geojson=1'
# get borders in json format
bounds = requests.get(url_bounds).json()[0]['boundingbox']
# convert to float
city_rect = [float(i) for i in bounds] 

#### Get the list of all venues

In [None]:
step = 0.005
city_venues = pd.DataFrame(columns=['Venue', 'Latitude', 'Longitude', 'Category', 'Id'])
for lat in np.arange(city_rect[0], city_rect[1], step):
    for lon in np.arange(city_rect[2], city_rect[3], step):
        loc_venues = searchVenuesInRect(sw=[lat, lon], ne=[lat+step, lon+step])
        city_venues = city_venues.append(loc_venues)

print(city_venues.shape)
city_venues.head()

In [None]:
city_venues = city_venues.drop_duplicates('Id')
city_venues = city_venues.reset_index(drop=True)

In [302]:
# save dataframe to csv file
city_venues.to_csv('city_venues.csv')

#### Draw the coordinate grid

In [31]:
map_tag = folium.Map(location=[47.2153657, 38.9285216], zoom_start=12)

for lat in np.arange(city_rect[0], city_rect[1], step):
    for lon in np.arange(city_rect[2], city_rect[3], step):
        folium.Rectangle([[lat, lon], [lat+step, lon+step]], color='red', weight=0.3).add_to(map_tag)
        
map_tag.fit_bounds([[city_rect[0], city_rect[2]], [city_rect[1], city_rect[3]]])
map_tag

#### Get detailed information about each venue

In [40]:
venue_id = '4dcbe98a1f6ea1401d49d12a' # ID of Л'Этуаль
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/4dcbe98a1f6ea1401d49d12a?client_id=V53MHPBBL23EVUFTF31HTKJAFRJIK2QL1WJURTZ3ARRFF3KE&client_secret=G5Z0ALDAAKEOPESPXKBZPXNB2TIEJKGWM33ZGSBCNW0VGW0T&v=20180604'

##### Get the venue's details

In [47]:
# Get the venue's overall rating
def get_rating(venue):
    try:
        result = venue['response']['venue']['rating']
    except:
        result = None
    return result

# Get the number of tips
def get_tips_count(venue):
    try:
        result = venue['response']['venue']['tips']['count']
    except:
        result = None
    return result

# Get the number of likes
def get_likes_count(venue):
    try:
        result = venue['response']['venue']['likes']['count']
    except:
        result = None
    return result


In [46]:
get_rating(result)
get_tips_count(result)
get_likes_count(result)

8.4

### Draw all venues on map

In [None]:
all_venues = pd.read_csv('city_venues.csv')
plot_venues(all_venues)

In [74]:
venues

Unnamed: 0,Venue,Latitude,Longitude,Category,Id
0,Три Орешка,47.215685,38.931146,Coffee Shop,53b98963498e9ba407ec3c00
1,"ФГУП ""Федеральный Кадастровый Центр ""Земля""",47.217520,38.929149,Government Building,4e5e11bcd22d7239c19cb7e8
2,Альбатрос,47.217143,38.931138,Café,5166e60be4b07cad39a35d31
3,"ЗМК ""Кристалл""",47.216197,38.933381,Warehouse,53be56dd498e1c9fd519683c
4,Ретро Павильон,47.218926,38.928707,Arcade,516906d8e4b095bb06b649c8
...,...,...,...,...,...
186,"Остановка ""ТУМ""",47.217985,38.924865,Bus Line,4ffc63a0e4b070f1c95d448c
187,DNS,47.214405,38.930721,Electronics Store,4fcb18c0e4b0ac24477def65
188,Амбрелла,47.214509,38.912076,Tech Startup,5530b3ea498e56b3c7437457
189,Оптик Центр,47.214539,38.930096,Optical Shop,51a5dc60498e3a0a64fc3d11


In [88]:
venues_clustering = pd.get_dummies(venues[['Category']], prefix="", prefix_sep="")

# add latitude and longitude columns back to dataframe
venues_clustering[['Latitude', 'Longitude']] = venues[['Latitude', 'Longitude']]

# move neighborhood column to the first column
fixed_columns = [venues_clustering.columns[-1], venues_clustering.columns[-2]] + list(venues_clustering.columns[:-2])
venues_clustering = venues_clustering[fixed_columns]

venues_clustering.head()

Unnamed: 0,Longitude,Latitude,Arcade,Art Gallery,Asian Restaurant,Athletics & Sports,Auto Dealership,Bakery,Bank,Bar,...,Theater,Theme Park,Toy / Game Store,Turkish Restaurant,Vegetarian / Vegan Restaurant,Warehouse,Wine Bar,Wine Shop,Winery,Zoo
0,38.931146,47.215685,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,38.929149,47.21752,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,38.931138,47.217143,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,38.933381,47.216197,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,38.928707,47.218926,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [93]:
from sklearn.preprocessing import StandardScaler

X = venues_clustering.values[:,1:]
# X = np.nan_to_num(X)
venues_norm = StandardScaler().fit_transform(X)
venues_norm

array([[-0.41246245, -0.07254763, -0.1028689 , ..., -0.07254763,
        -0.07254763, -0.07254763],
       [ 0.65119305, -0.07254763, -0.1028689 , ..., -0.07254763,
        -0.07254763, -0.07254763],
       [ 0.43257697, -0.07254763, -0.1028689 , ..., -0.07254763,
        -0.07254763, -0.07254763],
       ...,
       [-1.09421108, -0.07254763, -0.1028689 , ..., -0.07254763,
        -0.07254763, -0.07254763],
       [-1.0769375 , -0.07254763, -0.1028689 , ..., -0.07254763,
        -0.07254763, -0.07254763],
       [-0.92728587, -0.07254763, -0.1028689 , ..., 13.78404875,
        -0.07254763, -0.07254763]])

In [94]:
num_clusters = 3

k_means = KMeans(init="k-means++", n_clusters=num_clusters, n_init=12)
k_means.fit(venues_norm)
labels = k_means.labels_

print(labels)

[2 1 1 2 1 1 2 2 2 2 1 2 1 2 1 1 2 2 2 2 2 1 2 1 1 2 1 2 1 2 2 1 2 1 2 1 1
 1 2 2 2 2 2 1 2 1 1 1 2 2 1 2 1 1 1 2 1 2 1 1 1 2 1 1 1 1 1 2 1 1 2 2 2 2
 2 2 1 1 1 2 2 1 2 2 2 2 1 2 1 1 2 2 1 2 2 2 2 2 2 2 1 2 1 2 1 2 2 2 2 2 2
 2 2 2 2 1 2 1 2 2 1 1 2 2 2 2 2 2 1 2 2 2 2 2 1 1 1 2 2 1 2 1 1 2 1 2 2 2
 2 2 2 1 1 2 1 1 1 1 2 2 1 2 2 2 2 1 2 1 2 1 2 2 2 2 2 1 2 1 2 1 0 2 1 2 1
 1 1 2 2 2 2]
