Import required packages

In [1]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
from geopy import distance
import folium
import requests
from sklearn import linear_model
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

Import Foursquare credentials from json file created to preserve secrecy and store in global variables CLIENT_ID and CLIENT_SECRET

In [2]:
import json
globals().update(json.loads(open("foursquareCredentials.json", "r") .read()))
VERSION = '20181124' # Foursquare API version

Get longitude and latitude of location

In [3]:
geolocator = Nominatim(user_agent="specify_your_app_name_here")
location = geolocator.geocode("Rumbach Sebestyén u., Budapest, 1075 Hungary")
#location = geolocator.geocode("Galley Ave, Toronto")
lat = location.latitude
long = location.longitude

In [4]:
categoryId = '4bf58dd8d48988d16d941735' # Café
radius = 2
tick = 0.2

Define function degreeLen which for a given lat and long returns the length of a degree of lat and long in km

In [5]:
def degreeLen(lat, long):
    """Returns the lengths of a single degree of latitude and longitude in km for a given latitude and longitude
    """
    # get length of 1 degree at current latidude and logitude
    latDegree = distance.distance((lat - 0.5, long), (lat + 0.5, long)).km
    longDegree = distance.distance((lat, long - 0.5), (lat, long + 0.5)).km
    return latDegree, longDegree
degreeLen(lat, long)

(111.18052869040775, 75.34729533841742)

Define function grid that returns the set of grid points within a circle around location.  Grid points are a distance tick apart.

In [7]:
def grid(lat, long, radius, tick):
    """Returns a set whose elements are a grid of points within a circle around a location
    :param lat: latitude of centre of grid
    :param long: longitude of centre of grid
    :param radius: radius of grid in km
    :param tick: distance between points in grid in km
    """
    # get length of 1 degree at current latidude and logitude
    latDegree, longDegree = degreeLen(lat, long)
    
    # get length of 1 tick in degrees
    latTick = tick / latDegree
    longTick = tick / longDegree
    
    # get coordinates of point which is one radius to the South and West of (lat, long)
    bottomLeft = (lat - radius / latDegree, long - radius / longDegree)
    
    # get tick points along axis
    gridLats = np.arange(2 * radius / tick + 1) * latTick + bottomLeft[0]
    gridLongs = np.arange(2 * radius / tick + 1) * longTick + bottomLeft[1]
    
    # create the set grid made up of points within radius of lat, long, disance tick apart 
    grid = {(lt, lg) for lt in gridLats for lg in gridLongs if distance.distance((lat, long), (lt, lg)).km <= radius}
    
    return grid

Check grid function is working using Folium

In [9]:
g = grid(lat, long, radius, tick)
# create map
map_grid = folium.Map(location=[lat, long], zoom_start=13)

for (lt, lg) in g:
    folium.CircleMarker([lt, lg],
                        radius=2,
                        fill=True,
                        fill_opacity=0.5).add_to(map_grid)
print("Number of points in grid = " + str(len(g)))      
map_grid

Number of points in grid = 310


Define function for pulling list of nearby venues, with optional filter on category Id

In [16]:
url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&intent=browse&ll={},{}&radius={}&categoryId={}'
def getNearbyVenues(lat, long, radius, categoryId = ''):
    apiCall = url.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, long, radius, categoryId)
    venues = requests.get(apiCall).json()['response']['venues']
    venues_list=[]
    
    for v in venues:
        if len(v['categories']) > 0:
            venues_list.append([v['id'], v['name'], v['location']['distance'], v['categories'][0]['name']])
        else:
            venues_list.append([v['id'], v['name'], v['location']['distance'], 'None'])
    
    return(venues_list)

Check function getNearbyVenues is working

In [17]:
getNearbyVenues(lat, long, tick * 1000, categoryId)

[['510c25ede4b0a3ee4308e94a', 'Központ', 188, 'Café'],
 ['4c9ce18c542b224bbaade49f', 'Drum Cafe', 100, 'Café'],
 ['52054dd511d2778ebd4f1cf3', 'Blue Bird Cafe', 171, 'Café'],
 ['514d9e95e4b01bb2769654a3', 'Solinfo', 67, 'Café'],
 ['4c8e63f5a8de224be0182701', 'Café Vian', 182, 'French Restaurant'],
 ['5b9a6c1d1ffe97002c3349c7', 'Fresh Corner Downtown', 187, 'Café'],
 ['54535798498ef0cd9c592e6e', 'Blue Bird Roastery', 137, 'Café'],
 ['4b804281f964a520d36130e3', 'Spinoza Café', 87, 'Restaurant'],
 ['4b99441cf964a520be6e35e3', 'Printa Café', 111, 'Art Gallery'],
 ['5152e6f0e4b02b6799b2ef78', 'Tom Dixon Cafè & Showroom', 62, 'Café'],
 ['5bda0a2bb3c961002badd1c6', 'Kárpit Café', 128, 'Café'],
 ['57c41471498e814e459823ff', 'Blue Bird Cafe', 130, 'Café'],
 ['4c20cfe93573c9b602ea3515', 'Café Zenit', 148, 'Pub'],
 ['52495a1e11d224c2f94c5759', 'Alexandra Irodalmi Kávéház', 101, 'Café'],
 ['52b198d6498e62e5200cc830', 'Hinta Kávézó', 129, 'Café'],
 ['4e08828bd22d658532ac1d6b', 'Random Point', 187, '

Define function targetDensity which outputs a measure the density of a list of venues within a specific radius of a location.
The density measure has the property of being larger the more venues there are in the list and the closer they are.
So for each venue we subtract the distance from the radius so that a venue in the centre of the circle adds the radius distance to the density measure while a venue on the edge of the circle adds 0 to the density measure.

In [None]:
def targetDensity(venues, radius):
    distances = []
    [distances.append(radius/(v[2] + 1)) for v in venues if v[2] <= radius]
    return np.sum(np.asarray(distances))

In [None]:
targetDensity(getNearbyVenues(lat, long, tick * 1000, categoryId), tick * 1000)

In [None]:
# Check if density for empty list of venues = 0
targetDensity([], tick * 1000)

In [None]:
targets = []
ids = set()
for (lt, lg) in g:
    print((lt, lg))
    venues = getNearbyVenues(lt, lg, tick * 1000, categoryId)
    density = targetDensity(venues, tick * 1000)
    targets.append((lt, lg, density))
    for (v_id, name, distance, category) in venues:
        ids.add(v_id)

In [None]:
targetsDf = pd.DataFrame(targets)
targetsDf.columns = ['Latitude', 'Longitude', 'Density']
targetsDf.set_index(['Latitude', 'Longitude'], inplace=True)
targetsDf.sort_index(inplace=True)
targetsDf

In [None]:
targetsDf.describe()

In [None]:
targetsDf.plot(y='Density', kind='box')
plt.ylabel('Density')
plt.show()

In [None]:
targetsDf['Density Log'] = np.log10(targetsDf['Density'].values + 1)

In [None]:
targetsDf.describe()

In [None]:
targetsDf.plot(y='Density Log', kind='box')
plt.ylabel('Density Log')
plt.show()

In [None]:
ids

In [None]:
otherVenues = []
for (lt, lg) in g:
    print((lt, lg))
    venues = getNearbyVenues(lt, lg, tick * 1000)
    for (v_id, name, distance, category) in venues:
        if not (v_id in ids) and category != 'None':
            otherVenues.append((lt, lg, v_id, name, distance, category))

In [None]:
otherVenuesDf = pd.DataFrame(otherVenues)
otherVenuesDf.columns = ['Latitude', 'Longitude', 'VenueID', 'Name', 'Distance', 'Category']
otherVenuesDf.set_index(['Latitude', 'Longitude'], inplace=True)
otherVenuesDf.sort_index(inplace=True)
otherVenuesDf

In [None]:
otherVenuesDf['Density'] = (tick * 1000)/(otherVenuesDf['Distance'].values + 1) 

In [None]:
otherVenuesDf.groupby(['Latitude', 'Longitude', 'Category'])['Density'].sum().to_frame()

In [None]:
features = pd.pivot_table(otherVenuesDf,index=['Latitude', 'Longitude'],values='Density',
                          columns='Category',aggfunc=[np.sum],fill_value=0)['sum']

In [None]:
features.info()

In [None]:
features

In [None]:
features.describe()

In [None]:
features = np.log10(features + 1)

In [None]:
features.describe()

In [None]:
features = features.join(targetsDf).fillna(0).reset_index()

In [None]:
features.describe()

In [None]:
X = features.drop(['Latitude', 'Longitude', 'Density', 'Density Log'], axis=1)
y = features['Density Log']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=22)

In [None]:
reg_all = linear_model.LinearRegression()
reg_all.fit(X_train, y_train)
y_pred_test = reg_all.predict(X_test)
y_pred_train = reg_all.predict(X_train)
reg_all.score(X_test, y_test)

In [None]:
testDf = y_test.to_frame()
testDf['Predicted'] = y_pred_test
testDf.plot(kind='box')
plt.show()

In [None]:
testDf.describe()

In [None]:
plt.scatter(y_test, y_pred_test, color='blue')
plt.scatter(y_train, y_pred_train, color='red')
plt.show()

In [None]:
lasso = linear_model.Lasso(alpha=0.005, normalize=True)
lasso.fit(X_train, y_train)
lasso_pred_test = lasso.predict(X_test)
lasso_pred_train = lasso.predict(X_train)
lasso.score(X_test, y_test)

In [None]:
testDf = y_test.to_frame()
testDf['Predicted'] = lasso_pred_test
testDf.plot(kind='box')
plt.show()

In [None]:
plt.scatter(y_test, lasso_pred_test, color='blue')
plt.scatter(y_train, lasso_pred_train, color='red')
plt.show()

In [None]:
lasso_pred_all = lasso.predict(X)

In [None]:
plt.scatter(y, lasso_pred_all, color='green')
plt.show()

In [None]:
len(lasso_pred_all)

In [None]:
results = features[['Latitude', 'Longitude', 'Density Log']]

In [None]:
results['Predicted'] = lasso_pred_all

In [None]:
plt.scatter(results['Density Log'], results['Predicted'], color='green')
plt.show()

In [None]:
results['Rank'] = results['Predicted'] / (results['Density Log'] + 1)

In [None]:
top10 = results.sort_values(by='Rank', ascending=False).head(10)
top10

In [None]:
l = list(zip(top10['Latitude'], top10['Longitude']))
l

In [None]:
# create map
map_grid = folium.Map(location=[lat, long], zoom_start=13)

for (lt, lg) in l:
    folium.CircleMarker([lt, lg],
                        radius=4,
                        fill=True,
                        fill_opacity=0.5,
                        color='Red').add_to(map_grid)
       
map_grid