In [1]:
import requests
import time
import json
import pandas as pd
from scipy import stats
from datetime import date

In [2]:
# https://location.foursquare.com/places/docs/categories
categories={}
with open('Foursquare Categories.txt') as f:
    categories_list = f.readlines()

In [3]:
for pos in range(len(categories_list)):
    category_pair = categories_list[pos].strip().split('\t')
    categories[category_pair[0]]=category_pair[1]

In [4]:
# General Tool to search the Categories
search_Term = 'barber'
for category_id, category_label in categories.items():
    if (search_Term.upper() in category_label.upper()):
        print(category_id, category_label)

11062 Business and Professional Services > Health and Beauty Service > Barbershop


In [5]:
# Show me the major Categories
for category_id, category_label in categories.items():
    if '>' not in category_label:
        print(category_id, category_label)

10000 Arts and Entertainment
11000 Business and Professional Services
12000 Community and Government
13000 Dining and Drinking
14000 Event
15000 Health and Medicine
16000 Landmarks and Outdoors
17000 Retail
18000 Sports and Recreation
19000 Travel and Transportation


In [6]:
with open('Foursquare.com Developers API Key.txt') as f:
    api_key = f.readlines()[0].strip()
# https://location.foursquare.com/developer/reference/place-search

In [33]:
# For each of the 10 largest US cities. I asked chatGPT for the dead
# center of the main business area and it provided these coordinates.

new_york_city = ['40.7570', '-73.9855']
los_angeles = ['34.0511', '-118.2582']
chicago = ['41.8820', '-87.6278']
houston = ['29.7604', '-95.3698']
phoenix = ['33.4484', '-112.0740']
philadelphia = ['39.9526', '-75.1652']
san_antonio = ['29.4241', '-98.4936']
san_diego = ['32.7157', '-117.1611']
dallas = ['32.7767', '-96.7970']
austin = ['30.2672', '-97.7431']

location_list = [
    ('new_york_city', new_york_city),
    ('los_angeles', los_angeles),
    ('chicago', chicago),
    ('houston', houston),
    ('phoenix', phoenix),
    ('philadelphia', philadelphia),
    ('san_antonio', san_antonio),
    ('san_diego', san_diego),
    ('dallas', dallas),
    ('austin', austin)
]

In [34]:
for location_name, location_coords in location_list:

new_york_city ['40.7570', '-73.9855']
los_angeles ['34.0511', '-118.2582']
chicago ['41.8820', '-87.6278']
houston ['29.7604', '-95.3698']
phoenix ['33.4484', '-112.0740']
philadelphia ['39.9526', '-75.1652']
san_antonio ['29.4241', '-98.4936']
san_diego ['32.7157', '-117.1611']
dallas ['32.7767', '-96.7970']
austin [' 30.2672', '-97.7431']


In [58]:
location_name = 'new_york_city'
location_coords = ['40.7570', '-73.9855']
category = '10000'

In [62]:
ll = location_coords
radius_miles = '1' # Miles
categories = [category]
fields = ['distance']
sort = 'DISTANCE' # RELEVANCE, RATING, or DISTANCE
limit = 50 # 50 is the max limit

radius = str(int(float(radius_miles)*1609.34))
url = "https://api.foursquare.com/v3/places/search?" \
    + "ll={}&" \
    + "radius={}&" \
    + "categories={}&" \
    + "fields={}&" \
    + "sort={}&" \
    + "limit={}"

url = url.format(
    '%2C'.join(ll),
    radius,
    '%2C'.join(categories),
    '%2C'.join(fields),
    sort,
    limit
)

headers = {
    "accept": "application/json",
    "Authorization": "fsq35lyFWXftlnFhCZrNz8sR2cq0We8DGZ7rM8BVY+t/dpo="
}

response = requests.get(url, headers=headers)

df = pd.DataFrame(response.json()['results'])

# Count the results within the given radius
if len(df)<limit:
    results=len(df)
# If the results are limited by the limit, then I have to estimate how many results
# would appear within the radius if no limit. # My line of best fit method is lazy
# I know it's not a linear relationship, but it's hard to estimate since you'd think
# it's an exponential relationship as the area grows exponential to the radius, but
# don't forget that if you use city centers as the midpoint density will decrease
# as radius increases. They're also extrapolations which is a defiance to the stats
# gods. Just don't put much faith in the estimates being accurate.
else:
    estimate_df = pd.DataFrame(df['distance'].copy(deep=True))
    estimate_df['results']=range(1,limit+1)
    # I know the true intercept should be 0 so I'll give it some extra values with
    # 0 as the distance and results to help anchor the intercept near 0.
    zero_dict = {'distance':[0]*20,'results':[0]*20} # 20 is arbitrary
    estimate_df = pd.concat([estimate_df,pd.DataFrame(zero_dict)],ignore_index=True)
    slope, intercept, r_value, p_value, std_err = stats.linregress(estimate_df['distance'], estimate_df['results'])
    # del estimate_df
    results = int( slope * float(radius) + intercept )

print(results)

87


In [78]:
query_data_dict = {
    'location_name': location_name,
    'lat': location_coords[0],
    'lon': location_coords[1],
    'category': category,
    'radius_miles': radius_miles,
    'results': results,
    'distances': ','.join(list(df['distance'].astype(str))),
    'query_date': str(date.today())
}

In [80]:
query_data = pd.DataFrame(
    columns=[
        'location_name',
        'lat',
        'lon',
        'category',
        'radius_miles',
        'results',
        'distances',
        'query_date']
)