Import required packages

In [1]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
from geopy import distance
import folium
import requests
from sklearn import linear_model
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

Import Foursquare credentials from json file created to preserve secrecy and store in global variables CLIENT_ID and CLIENT_SECRET

In [2]:
import json
globals().update(json.loads(open("foursquareCredentials.json", "r") .read()))
VERSION = '20181124' # Foursquare API version

## Input the variables that define the problem
Get longitude and latitude of location where we want to open a new business

In [3]:
geolocator = Nominatim(user_agent="specify_your_app_name_here")
location = geolocator.geocode("Toronto")
#location = geolocator.geocode("st dunstans st, canterbury uk")
lat = location.latitude
long = location.longitude

Populate variables containing the other parameters of the search problem:
* categoryId = foursquare categoryId of the business we wish to open. A full list of categories can be found here: https://developer.foursquare.com/docs/resources/categories.
* radius = the maximum distance from the location within which we want to open the business in km.
* tick = the level of accuracy in km which we want to return for the recommended locations. 

In [4]:
categoryId = '4bf58dd8d48988d16d941735' # Café
radius = 1
tick = 0.2

## Build function which pulls venue data from Foursquare
Define function for pulling list of nearby venues, with optional filter on category Id

In [5]:
url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&intent=browse&ll={},{}&radius={}&categoryId={}'
apiCall = url.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, long, radius, categoryId)
venues = requests.get(apiCall).json()['response']['venues']
dataframe = json_normalize(venues)
dataframe.head()

In [6]:
requests.get(apiCall).json()

{'meta': {'code': 200, 'requestId': '5c0d0108dd57971cf7bc7d18'},
 'response': {'venues': []}}

In [7]:
url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&intent=browse&ll={},{}&radius={}&categoryId={}'
def getNearbyVenues(lat, long, radius, categoryId = ''):
    apiCall = url.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, long, radius, categoryId)
    venues = requests.get(apiCall).json()['response']['venues']
    venues_list=[]
    
    for v in venues:
        if len(v['categories']) > 0:
            venues_list.append([v['id'], v['name'], v['location']['distance'], v['categories'][0]['name']])
        else:
            venues_list.append([v['id'], v['name'], v['location']['distance'], 'None'])
    
    return(venues_list)

Check function getNearbyVenues is working

In [8]:
getNearbyVenues(lat, long, tick * 1000, categoryId)

[['4f513029e4b07c3382c9fdb9', 'Cafe Plenty', 192, 'Café'],
 ['4b59d414f964a5208f9a28e3', "Java Joe's", 154, 'Café'],
 ['4b0d2ef1f964a520664423e3', 'Avenue Café + Bistro', 179, 'Sandwich Place'],
 ['5706b570498e618216dbfd32',
  "Uncle Tetsu's Japanese Angel Cafe",
  118,
  'Café'],
 ['4bd8dd55cc5b95215c3af24f', 'Bistro 438', 111, 'Bistro'],
 ['50b77ec7e4b08ecf5a78a68f', 'Urbana Coffee Co.', 148, 'Café'],
 ['58e716e5fa7ed04f61194722', 'Café De Paris, Downtown Toronto', 102, 'Café'],
 ['50046391e4b025b2598516f3', 'Coffee Antidote 9th Floor', 125, 'Café'],
 ['51bd60fa498e5ce3f52c6016', 'Prince Cafe', 106, 'Café']]

In [11]:
url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&intent=browse&ll={},{}&radius={}&categoryId={}'
def getNearbyVenues(lat, long, radius, categoryId = ''):
    apiCall = url.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, long, radius, categoryId)
    venues = requests.get(apiCall).json()['response']['venues']
    dataframe = json_normalize(venues)
    venues_list=[]
    for v in venues:
        if len(v['categories']) > 0:
            venues_list.append([v['id'], v['name'], v['location']['distance'], v['categories'][0]['name']])
        else:
            venues_list.append([v['id'], v['name'], v['location']['distance'], 'None'])
    
    return dataframe
getNearbyVenues(lat, long, tick * 1000, categoryId)

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.postalCode,location.state,name,referralId
0,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",False,4f513029e4b07c3382c9fdb9,250 Dundas Street West,CA,Toronto,Canada,Simcoe Street,192,"[250 Dundas Street West (Simcoe Street), Toron...","[{'label': 'display', 'lat': 43.65457125894357...",43.654571,-79.38945,M5T 2Z5,ON,Cafe Plenty,v-1544356294
1,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",False,4b59d414f964a5208f9a28e3,180 Dundas St. W.,CA,Toronto,Canada,at Edward,154,"[180 Dundas St. W. (at Edward), Toronto ON M5G...","[{'label': 'display', 'lat': 43.65520124130076...",43.655201,-79.386333,M5G 1Z8,ON,Java Joe's,v-1544356294
2,"[{'id': '4bf58dd8d48988d1c5941735', 'name': 'S...",False,4b0d2ef1f964a520664423e3,480 University Ave,CA,Toronto,Canada,at Dundas St W,179,"[480 University Ave (at Dundas St W), Toronto ...","[{'label': 'display', 'lat': 43.65511700442771...",43.655117,-79.388764,M5G 1V2,ON,Avenue Café + Bistro,v-1544356294
3,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",False,5706b570498e618216dbfd32,191 Dundas St. West,CA,Toronto,Canada,At Centre Ave.,118,"[191 Dundas St. West (At Centre Ave.), Toronto...","[{'label': 'display', 'lat': 43.65500131144402...",43.655001,-79.386899,,ON,Uncle Tetsu's Japanese Angel Cafe,v-1544356294
4,"[{'id': '52e81612bcbc57f1066b79f1', 'name': 'B...",False,4bd8dd55cc5b95215c3af24f,438 University Avenue,CA,Toronto,Canada,Dundas Street West,111,"[438 University Avenue (Dundas Street West), T...","[{'label': 'display', 'lat': 43.6543883919314,...",43.654388,-79.388457,,ON,Bistro 438,v-1544356294
5,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",False,50b77ec7e4b08ecf5a78a68f,Toronto,CA,Toronto,Canada,,148,"[Toronto, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65322456230742...",43.653225,-79.388749,,ON,Urbana Coffee Co.,v-1544356294
6,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",False,58e716e5fa7ed04f61194722,201 Dundas St W,CA,Toronto,Canada,,102,"[201 Dundas St W, Toronto ON M5G 1C8, Canada]","[{'label': 'display', 'lat': 43.654872, 'lng':...",43.654872,-79.387429,M5G 1C8,ON,"Café De Paris, Downtown Toronto",v-1544356294
7,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",False,50046391e4b025b2598516f3,,CA,,Canada,,125,[Canada],"[{'label': 'display', 'lat': 43.65436014476997...",43.65436,-79.388666,,,Coffee Antidote 9th Floor,v-1544356294
8,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",False,51bd60fa498e5ce3f52c6016,442 Dundas St W,CA,Toronto,Canada,,106,"[442 Dundas St W, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.654916, 'lng':...",43.654916,-79.387351,,ON,Prince Cafe,v-1544356294


## Build function to calculate density of venues within a specified radius of a location
Define function venueDensity which outputs a measure the density of a list of venues within a specific radius of a location.
The density measure has the property of being larger the more venues there are in the list and the closer they are.
So for each venue we subtract the distance of the venue from the location from the radius.
This measure is then summed up for all the venues

Worked example, radius = 200m, 2 venues, 1 distance 1m from the location, the other distance 199m:
* venue 1 density = 200 - 1 = 199
* venue 2 density = 200 - 199 = 1
* total density = 199 + 1 = 200

In [None]:
def venueDensity(venues, radius):
    distances = []
    [distances.append(radius - v[2]) for v in venues if v[2] < radius]
    return np.sum(np.asarray(distances))

In [None]:
venueDensity(getNearbyVenues(lat, long, tick * 1000, categoryId), tick * 1000)

In [None]:
# Check if density for empty list of venues = 0
venueDensity([], tick * 1000)

## Get venue density for each point on the grid around location

In [None]:
targets = []
ids = set()
for (lt, lg) in g:
    print((lt, lg))
    venues = getNearbyVenues(lt, lg, tick * 1000, categoryId)
    density = venueDensity(venues, tick * 1000)
    targets.append((lt, lg, density))
    for (v_id, name, distance, category) in venues:
        ids.add(v_id)

In [None]:
targetsDf = pd.DataFrame(targets)
targetsDf.columns = ['Latitude', 'Longitude', 'Density']
targetsDf.set_index(['Latitude', 'Longitude'], inplace=True)
targetsDf.sort_index(inplace=True)
targetsDf

In [None]:
targetsDf.describe()

In [None]:
targetsDf.plot(y='Density', kind='box')
plt.ylabel('Density')
plt.show()

In [None]:
targetsDf['Log Density'] = np.log10(targetsDf['Density'].values + 1)

In [None]:
targetsDf.describe()

In [None]:
targetsDf.plot(y='Log Density', kind='box')
plt.ylabel('Log Density')
plt.show()

In [None]:
ids

In [None]:
otherVenues = []
for (lt, lg) in g:
    print((lt, lg))
    venues = getNearbyVenues(lt, lg, tick * 1000)
    for (v_id, name, distance, category) in venues:
        if not (v_id in ids) and category != 'None' and distance < tick * 1000 :
            otherVenues.append((lt, lg, v_id, name, distance, category))

In [None]:
otherVenuesDf = pd.DataFrame(otherVenues)
otherVenuesDf.columns = ['Latitude', 'Longitude', 'VenueID', 'Name', 'Distance', 'Category']
otherVenuesDf.set_index(['Latitude', 'Longitude'], inplace=True)
otherVenuesDf.sort_index(inplace=True)
otherVenuesDf

In [None]:
otherVenuesDf['Density'] = (tick * 1000) - otherVenuesDf['Distance']
otherVenuesDf

In [None]:
features = pd.pivot_table(otherVenuesDf,index=['Latitude', 'Longitude'],values='Density',
                          columns='Category',aggfunc=[np.sum],fill_value=0)['sum']

In [None]:
features.info()

In [None]:
features

In [None]:
features.describe()

In [None]:
features = np.log10(features + 1)

In [None]:
features.describe()

In [None]:
features = features.join(targetsDf).fillna(0).reset_index()

In [None]:
features.describe()

In [None]:
X = features.drop(['Latitude', 'Longitude', 'Density', 'Log Density'], axis=1)
y = features['Log Density']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=22)

In [None]:
reg_all = linear_model.LinearRegression()
reg_all.fit(X_train, y_train)
y_pred_test = reg_all.predict(X_test)
y_pred_train = reg_all.predict(X_train)
reg_all.score(X_test, y_test)

In [None]:
testDf = y_test.to_frame()
testDf['Predicted'] = y_pred_test
testDf.plot(kind='box')
plt.show()

In [None]:
testDf.describe()

In [None]:
plt.scatter(y_test, y_pred_test, color='blue', label='Test')
plt.scatter(y_train, y_pred_train, color='red', label='Training')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.legend(loc='upper left')
plt.show()

In [None]:
lasso = linear_model.Lasso(alpha=0.005, normalize=True)
lasso.fit(X_train, y_train)
lasso_pred_test = lasso.predict(X_test)
lasso_pred_train = lasso.predict(X_train)
lasso.score(X_test, y_test)

In [None]:
testDf = y_test.to_frame()
testDf['Predicted'] = lasso_pred_test
testDf.plot(kind='box')
plt.show()

In [None]:
plt.scatter(y_test, lasso_pred_test, color='blue', label='Test')
plt.scatter(y_train, lasso_pred_train, color='red', label='Training')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.legend(loc='upper left')
plt.show()

In [None]:
lasso_pred_all = lasso.predict(X)

In [None]:
plt.scatter(y, lasso_pred_all, color='green')
plt.show()

In [None]:
len(lasso_pred_all)

In [None]:
results = features[['Latitude', 'Longitude', 'Log Density']]

In [None]:
results['Predicted'] = lasso_pred_all

In [None]:
plt.scatter(results['Log Density'], results['Predicted'], color='green')
plt.show()

In [None]:
results['Rank'] = results['Predicted'] / (results['Log Density'] + 1)

In [None]:
top10 = results.sort_values(by='Rank', ascending=False).head(10)
top10

In [None]:
l = list(zip(top10['Latitude'], top10['Longitude']))
l

In [None]:
# create map
map_grid = folium.Map(location=[lat, long], zoom_start=13)

for (lt, lg) in l:
    folium.CircleMarker([lt, lg],
                        radius=4,
                        fill=True,
                        fill_opacity=0.5,
                        color='Red').add_to(map_grid)
       
map_grid