In [98]:
from yelp.client import Client
from yelp.oauth1_authenticator import Oauth1Authenticator
import os
import json

def import_credentials(filename='yelp_credentials.json'):
    """
    Finds the credentials file describing the token that's needed to access Yelp services.

    :param filename -- The filename at which Yelp service credentials are stored. Defaults to
    `yelp_credentials.json`.
    """
    if filename in [f for f in os.listdir('.') if os.path.isfile(f)]:
        data = json.load(open(filename))
        return data
    else:
        raise IOError('This API requires Yelp credentials to work. Did you forget to define them?')

credentials = import_credentials()

auth = Oauth1Authenticator(
    consumer_key=credentials['consumer_key'],
    consumer_secret=credentials['consumer_secret'],
    token=credentials['token'],
    token_secret=credentials['token_secret']
)

client = Client(auth)

In [266]:
from pandas import DataFrame
import geojson

def fetch(name, area='New York'):
    """
    Fetches all exact matches for "name" in the area.
    Returns a list all exactly matching yelp.obj.business.Business objects.
    e.g. all instances of a chain near your designated area.
    """
    offset = 0
    ret = []
    while True:
        response = fetch_businesses(name, offset, area)
        filtered_results = [business for business in response.businesses if business.name == name]
        names = [business.name for business in response.businesses]
        is_closed = [business.is_closed for business in response.businesses]
        # print(names)
        ret += filtered_results
        # print('num results', len(filtered_results))
        # print('offset', offset)
        # The Yelp search API gets progressively worse at serving results.
        # Eventually it drills down to 0 results returned.
        # None of the results at that point are even in the chosen area anymore, probably, but there isn't a better
        # cutoff point.
        if len(filtered_results) == 0:
            break
        else:
            offset += len(filtered_results)
            continue
    coordinates = [(business.location.coordinate.latitude,
                    business.location.coordinate.longitude) for business in ret]    
    # At this point we're throwing away the Business contents of the query.
    # Coring down to just coordinates.
    df = DataFrame(data={'coordinates': coordinates})
    # Now we munge the DataFrame to get rid of locations not actually in Manhattan.
    manhattan = load_manhattan()
    # Note that folium wants (lat, long) while the PiP algo I stole wants (long, lat)
    # The latter is the common usage. But the former makes mathematical sense (from Cartesian graphs).
    # Weird inconsistency to have to handle.
    # Compute the coordinates which are within Manhattan as a new Series.
    df['in manhattan'] = df.apply(lambda x: point_inside_polygon(x[0][1], x[0][0], manhattan), axis=1)
    # Filter out the points not in Manhattan.
    df = df[df['in manhattan'] == True]
    # Delete the temporary column.
    del df['in manhattan']
    # Not strictly necessary, but: fix the index: (1, 4, 17, ...) -> (1, 2, 3, ...).
    df.index = range(0, len(df))
    return df

def fetch_businesses(name, offset, area):
    """
    To-the-metal raw Yelp search API query. Wrapped by fetch() above.
    """
    params = {
        'term': name,
        'limit': 20,
        'offset': offset
    }
    response = client.search(area, **params)
    return response

# determine if a point is inside a given polygon or not
# Polygon is a list of (x,y) pairs.
# Borrowed from: http://www.ariel.com.au/a/python-point-int-poly.html
def point_inside_polygon(x,y,poly):
    """
    Checks if a point is inside a polygon.
    Used to validate points returned by the Yelp API as being inside of Manhattan.
    """

    n = len(poly)
    inside = False

    p1x,p1y = poly[0]
    for i in range(n+1):
        p2x,p2y = poly[i % n]
        if y > min(p1y,p2y):
            if y <= max(p1y,p2y):
                if x <= max(p1x,p2x):
                    if p1y != p2y:
                        xints = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x
                    if p1x == p2x or x <= xints:
                        inside = not inside
        p1x,p1y = p2x,p2y

    return inside

# I grabbed the Manhattan GeoJSON from a recent Gist of mine, itself taken from a database of them somewhere:
# https://gist.github.com/ResidentMario/4c268d70c4502d522782
def load_manhattan(filename="manhattan.geojson"):
    """
    Loads Manhattan.
    What else?
    Are you surprised?
    """
    with open(filename) as f:
        dat = f.read()
        obj = geojson.loads(dat)
    return list(geojson.utils.coords(obj))

In [279]:
import folium

def map_coordinates(locs):
    """
    Returns a folium map of all tagged locations of that name near New York.
    Wrapped by map_chain() below.
    """
    ret = folium.Map(location=[40.753889, -73.983611], zoom_start=11)
    for row in locs.iterrows():
        ret.simple_marker(location=row[1]['coordinates'])
    return ret
    
def map_chain(name):
    """
    Returns a folium map of all tagged locations of that name near New York.
    Wrapper for map_coordinates().
    Calls fetch() implicitly.
    """
    locs = fetch(name)
    return map_coordinates(locs)

In [283]:
bibble_and_sip = map_chain("Bibble & Sip")
bibble_and_sip.create_map(path="bibble_and_sip.html")
bibble_and_sip

In [284]:
gregorys = map_chain("Gregory's Coffee")
gregorys.create_map(path="gregorys_coffee.html")
gregorys

In [285]:
starbucks = map_chain("Starbucks")
starbucks.create_map(path="starbucks.html")
starbucks

In [286]:
# Continue with:
# 1. Fix problem: the Upper East Side and near-Williamsburg areas are awash with Starbucks, too.
#    Where did this data go? Yelp API issue?
# 1. Pull a distance calculation function out of geopy (https://pypi.python.org/pypi/geopy)
# 2. Write a method to create a random sample of 1000