# Exploring Foursquare
Updated, again, 05/31/19
* How much data can I get in a day?
* What categories should I examine and how?
* Will "shrinking box" exploration work?
* Does Folium have a square marker (try in other notebook?)

In [0]:
import numpy as np # library to handle data in a vectorized manner

# from time import sleep        # For Foursquare queries per second limit if any

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

# #!conda install -c conda-forge geopy --yes # uncomment this line if needed
# from geopy.geocoders import Nominatim # convert address into latitude and longitude

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if needed
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [0]:
# @hidden cell
# Foursquare information

CLIENT_ID = 'CWCPNAVXDH3TI1BGS4VED4ANSUKEFGHBA4511GRPYPKPNJRD' # your Foursquare ID
CLIENT_SECRET = 'ALMFEYIENSPH3RV3TQB1NGWTKTANVJ5QTHAVZ5B1GRJWIP21' # your Foursquare Secret

In [0]:
# Foursquare query information that will be constant during this project
VERSION = '20190427' # Foursquare API version
LIMIT = 120

In [0]:
# define function that extracts the category of the venue
# used in Foursquare section

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [0]:
# define function to extract Foursquare information given
# a bounding box and a list of categories

def getVenuesByBox(sw, ne,    # the southwest & northeast box corners, tuples
                   codes):      # Foursquare venue codes, list of strings
                  
  
  # Foursquare category ID requires comma-separated text string
  code_string = ",".join(codes)
  
  # define the search URL
  url = f'https://api.foursquare.com/v2/venues/explore?&client_id={CLIENT_ID}' + \
      f'&client_secret={CLIENT_SECRET}&v={VERSION}&sw={sw[0]},{sw[1]}' + \
      f'&ne={ne[0]},{ne[1]}' + \
      f'&categoryId={code_string}&limit={LIMIT}'
  
  # submit the url and capture the returned text
  venues = requests.get(url).json()["response"]['groups'][0]['items']
  
  # if any venues are captured, process and return dataframe. Otherwise, return None. 
  if venues:
  
    venues = json_normalize(venues) # flatten JSON

    # filter columns
    filtered_columns = ['venue.name', 'venue.categories', 'venue.location.postalCode', 
                        'venue.location.lat', 'venue.location.lng', 'venue.location.state']
    venues = venues.loc[:, filtered_columns]

    # filter the category for each row
    venues['venue.categories'] = venues.apply(get_category_type, axis=1)

    # clean columns
    venues.columns = [col.split(".")[-1] for col in venues.columns]
  
    return venues
  
  else:
    
    return None  # no venues in box meet criteria
  
  

In [0]:
# Define function to split a bounding box into 4 sub-boxes

def split_box_old(sw, ne):
    len_x, len_y = (ne[0] - sw[0])/2, (ne[1] - sw[1])/2
    sw_corners = [(sw[0],       sw[1]      ),
                  (sw[0]+len_x, sw[1]      ),
                  (sw[0],       sw[1]+len_y),
                  (sw[0]+len_x, sw[1]+len_y)]
    boxes = []
  
    for corner in sw_corners:
        boxes.append((corner, (corner[0]+len_x, corner[1]+len_y)))
        
    return boxes

In [0]:
def split_box(box, purpose="number"):
    MAX_DELTA_LAT = 0.8
    MAX_DELTA_LNG = 0.9
    delta_lat = abs(box[0][0] - box[1][0])
    delta_lng = abs(box[0][1] - box[1][1])
    
    boxes = []
    
    if purpose == "number":  # Splitting box because too many results returned
        lat_divisions = lng_divisions = 2
    elif purpose == "size":  # Splitting because box is too large for Foursquare
        
        lat_divisions = math.ceil(delta_lat / MAX_DELTA_LAT)
        lng_divisions = math.ceil(delta_lng / MAX_DELTA_LNG)
        
    else:  # This is an error.
        raise ValueError("This function only supports 'size' and 'number' box splitting")
    
    for i in range(lat_divisions):
        for j in range(lng_divisions):
            lat_side = delta_lat / lat_divisions
            lng_side = delta_lng / lng_divisions
            swij = (box[0][0] + i * lat_side, box[0][1] + j * lng_side)
            neij = (swij[0] + lat_side, swij[1] + lng_side)
            boxes.append((swij, neij))            
    
    return boxes

In [0]:
columns=['sw','ne', 'clients']
df_box = pd.DataFrame(columns=columns)
# df_pandas_sucks = pd.DataFrame([[1, 2, 3]], columns=columns)
# df_box = df_box.append(df_pandas_sucks, ignore_index = True)
# df_box = df_box.append(df_pandas_sucks, ignore_index = True)
# df_box = df_box.append(df_pandas_sucks, ignore_index = True)
# df_box = df_box.append(df_pandas_sucks, ignore_index = True)
box_list=[]

In [0]:
def shrink_box(sw, ne, codes):

    df_results = getVenuesByBox(sw, ne, codes)
    
    try:
        
        num_clients, num_columns = df_results.shape
        
    
        if num_clients < 100:

            # Add query to list. Is try statement still needed?
            try:
                box_list.append([sw, ne, num_clients])
#                 df_boxrow = pd.DataFrame([[sw, ne, num_clients]], columns=columns)
#               print(df_boxrow)
#                 df_box = df_box.append(df_pandas_sucks, ignore_index=True)
            except:
                print('error')
                
                
            return df_results
    
        else:    
            df_results = df_results[0:0]  # Empty results
      
            for box in split_box((sw,ne)):
                sub_result = shrink_box(box[0],box[1], codes)
        
                try:  
                    df_results = df_results.append(sub_result, ignore_index = True)
                    print(sub_result.shape)
                except:
                    print("Empty")    
              
        return df_results
    
    except:
        # THIS IS WHERE YOU PUT AN EMPTY BOX
        return None

In [0]:
def what_fuck():
    box_list.append(34)
   
what_fuck()

box_list

[34]

In [0]:
# fsw = (33.946964, -84.150187)  
# fne = (33.971041, -84.121381)

# fsw = (33.913157, -84.220630)  #The big box
# fne = (34.025437, -84.062058)

# fsw = (33.751982, -84.264245)  #The bigger box - all G county
# fne = (34.163175, -83.802854)

# fsw = (33.606360, -84.520664)  # ITP
# fne = (33.923112, -84.238162)

fsw = (33.606360, -84.520664)  # Bigger. ITP and G County
fne = (34.163175, -83.802854)


# fsw = (40.692288, -74.033045)  # Manhatten - 8000+ venues
# fne = (40.916004, -73.763431)

# fsw = (30.356590, -85.468937)  # All Georgia errors
# fne = (34.987081, -80.687209)

# fsw = (34.254931, -83.466280) #roughly 50 mile box on side
# fne = (35.707023, -81.354670)


asian =  ['4bf58dd8d48988d142941735']
indian = ['4bf58dd8d48988d10f941735']

asian = indian + asian

box_list = []

df_x = shrink_box(fsw,fne,asian)

In [0]:
print(df_x.shape)
df_x.head()


(1826, 6)


Unnamed: 0,name,categories,postalCode,lat,lng,state
0,Noodle,Asian Restaurant,30337,33.654405,-84.44965,GA
1,Octopus Bar,Asian Restaurant,30316,33.739636,-84.345671,GA
2,Hot Cafe,Asian Restaurant,30349,33.611082,-84.436311,GA
3,P.F. Chang's,Asian Restaurant,30320,33.640716,-84.439281,GA
4,Pho Lanxang,Asian Restaurant,30297,33.630234,-84.356814,GA


In [1]:
map_georgia = folium.Map(location=[33.75, -84.38], zoom_start=10)

# folium.CircleMarker([33.606360, -84.520664], radius=50).add_to(map_toronto)

# folium.Circle([33.971041, -84.121381], radius=5000).add_to(map_toronto)
density = []

    
for box in box_list:
    a = np.log10(box[2]/((box[0][0] - box[1][0])*(box[0][1] - box[1][1])))

    density.append(a) # box[2]/((box[0][0] - box[1][0])*(box[0][1] - box[1][1]))
    

for i, box in enumerate(box_list):
    
    opacity = box[2]/150#(density[i]-min(density))/(max(density)-min(density))
                  
    folium.Rectangle([box[0], box[1]], 
                     popup=str(box[2]), weight = 2, color="black", opacity=0.00, fill=True, fill_color='orange', 
                     fill_opacity=(opacity)).add_to(map_georgia)
    
  
    
    
    
map_georgia

NameError: ignored

# Get by Box Pseudo Code

calls: 
* **split_box**(box, purpose="size")
* **query_box**(box, codes)


    gVBB(sw, ne, codes):
        venues = empty data frame
        query_list = empty list
        start_box = [sw, ne]
        version =
        Limit = 
        
        # Create list of boxes from major box
         
        for box in split_box(start_box, purpose = 'size'):
           shrink_box(box, codes)
           
       return venues, query_list
    
        
        

# shrink_box Pseudo Code

**def shrink_box(box, codes):**

    def query_box(box, codes)
        # Existing getVenuesByBox, updated for box instead of sw, ne
        
    box_results = query_box(box, codes)
    
    try  # to see if anything was returned
        num_clients, num_columns = box_results.shape
        
        if num_clients < 100:
        
            venues.append(box_results)
            query_list.append([box, num_clients])
            
         else:  # results were returned, but at limit
         
             for sub_box in split_box(box, purpose='number'):
                 shrink_box(sub_box, codes)
                 
      except
          query_list.append([box, 0])
          
**return**    # no need to return anything, because any valid results have been appended
        





In [0]:
asian = ['4bf58dd8d48988d142941735']
indian = ['4bf58dd8d48988d10f941735']

asian = indian + asian

fsw = (33.946964, -84.150187)  
fne = (33.971041, -84.121381)

# fsw = (31.468720, -79.166785)  # Offshore - used to test for area with no venues
# fne = (31.662861, -78.990998)





# df_asian = 
# df_indian = shrink_box(fsw, fne, indian)
df_nearme = getVenuesByBox(fsw, fne, asian)
print(df_nearme.shape)

df_nearme.head()

(100, 6)


Unnamed: 0,name,categories,postalCode,lat,lng,state
0,Saigon Cafe Duluth,Asian Restaurant,30096,33.960122,-84.136158,GA
1,Seo Ra Beol Restaurant,Asian Restaurant,30096,33.963876,-84.141964,GA
2,Mini Hot Pot 2,Asian Restaurant,30096,33.959822,-84.134848,GA
3,SunO Dessert,Asian Restaurant,30096,33.96918,-84.14504,GA
4,Assi Plaza,Supermarket,30096,33.94716,-84.123698,GA


In [0]:
print(df_nearme.shape)
print(set(df_nearme['state']))
df_indian.head()


(58, 6)
{'GA', 'Georgia'}


Unnamed: 0,name,categories,postalCode,lat,lng,state
0,Curry Cuisine Indian Food,Indian Restaurant,,33.960464,-84.12516,GA


In [0]:
# Location for sample query - basically my neighborhood in Duluth

super_h = (33.969303, -84.143626)

# corners for Foursquare bounding box search


albany_ne = (32.114878, -83.489320)
albany_sw = (31.517953, -84.222243)

# Foursquare categories

# Korean restaurant (sub category of above) '4bf58dd8d48988d113941735'
# Indian restaurant (not sub category of above) '4bf58dd8d48988d10f941735'

In [0]:
# Define search url and test.

# fsw, fne = albany_sw, albany_ne



url = f'https://api.foursquare.com/v2/venues/explore?&client_id={CLIENT_ID}' + \
      f'&client_secret={CLIENT_SECRET}&v={VERSION}&sw={fsw[0]},{fsw[1]}' + \
      f'&ne={fne[0]},{fne[1]}' + \
      f'&categoryId={asian}' # &limit={LIMIT}'    

results = requests.get(url).json()

In [0]:
# Now convert to dataframe and examine. 

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.postalCode','venue.location.lat','venue.location.lng', 'venue.location.state']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

print(nearby_venues.shape)
nearby_venues.sort_values(by=['categories']).head(50)

KeyError: ignored

In [0]:
print(venues)

[{'reasons': {'count': 0, 'items': [{'summary': 'This spot is popular', 'type': 'general', 'reasonName': 'globalInteractionReason'}]}, 'venue': {'id': '4b2d1a43f964a52056ce24e3', 'name': 'Saigon Cafe Duluth', 'location': {'address': '3675 Satellite Blvd #750', 'crossStreet': 'and Pleasant Hill Rd', 'lat': 33.96012236013812, 'lng': -84.13615842313993, 'labeledLatLngs': [{'label': 'display', 'lat': 33.96012236013812, 'lng': -84.13615842313993}], 'postalCode': '30096', 'cc': 'US', 'city': 'Duluth', 'state': 'GA', 'country': 'United States', 'formattedAddress': ['3675 Satellite Blvd #750 (and Pleasant Hill Rd)', 'Duluth, GA 30096', 'United States']}, 'categories': [{'id': '4bf58dd8d48988d142941735', 'name': 'Asian Restaurant', 'pluralName': 'Asian Restaurants', 'shortName': 'Asian', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/asian_', 'suffix': '.png'}, 'primary': True}], 'photos': {'count': 0, 'groups': []}}, 'referralId': 'e-0-4b2d1a43f964a52056ce24e3-0'}, {'reasons':

# Data structures needed

    df_box: 
    - sw, ne
    - number of clients
    
    df_client: 
    - Name
    - lat & long
    - categories
    - postal code (?)
    - state

# Foursquare Information

Free plan includes:
* 2 queries per second
* 95,000 regular calls per day
* 500 premium calls per day

Attribution is **required**: Whenever Foursquare data is showing on the screen, attribution to Foursquare must be visible simultaneously or before the Foursquare data is shown.



   