In [14]:
# Importing Libraries
import requests
import json
import pandas as pd
import os

In [7]:
# Loading CityBikes data from Part 1
ReddingBikes = pd.read_csv('../data/ReddingBikesDF.csv')

# [Foursquare](https://docs.foursquare.com/developer/reference/places-api-overview)

### Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [3]:
# Call Foursqaure API function
def getVenuesFS(latitude, longitude, radius, api_key, categories, fields, limit):
    """
    Get venues from foursquare with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query (must be combined with longitude)
        longitude (float): longitude for query (must be combined with latitude)
        api_key (str): foursquare API to use for query
        categories (str) : Foursquare category codes. Separate multiple ids with commas
        fields (str): Foursquare fields. Seperate multiple fields with commas.
        limit (int): Number of results to return, max 50
    Returns:
        response: response object from the requests library.
    """
    headers = {
        "accept": "application/json",
        "Authorization": api_key
    }

    ll=str(latitude) + ',' + str(longitude)

    url = f'https://api.foursquare.com/v3/places/search?ll={ll}&radius={radius}&categories={categories}&limit={str(limit)}&fields={fields}'

    results = requests.get(url, headers=headers)

    return results

Foursquare [category codes](https://docs.foursquare.com/data-products/docs/categories) for Dining & Drinking and POIs
* 13000 Dining & Drinking
* 16000	Landmarks and Outdoors


In [27]:
# Setting Constant Variables 
FOURSQUARE_KEY = os.getenv('Foursquare_API_KEY')
radius = 1000
categories = '13000,16000'
fields = 'fsq_id,name,location,categories,distance,verified,rating,stats,popularity,price'
limit = 50

In [28]:
# Pulling all info for each bike station into a list
FoursquareVenues = [] 

for i in range(len(ReddingBikes)):
    venues = getVenuesFS(ReddingBikes['latitude'][i], ReddingBikes['longitude'][i], radius, FOURSQUARE_KEY, categories, fields, limit).json()
    FoursquareVenues.append(venues)    


In [29]:
# Function to return category group using category id code
def CategoryGroup(categoryID):
    """
    Returns Foursquare Category Group from CategoryID 

    Args:
        categoryID (str): String of category id code from foursqaure

    Returns:
        Category Group from foursquare codes
    """
    if categoryID.startswith('10'): 
        return 'Arts and Entertainment'
    elif categoryID.startswith('11'): 
        return 'Business and Professional Services'
    elif categoryID.startswith('12'): 
        return 'Community and Government'
    elif categoryID.startswith('13'): 
        return 'Dining and Drinking'
    elif categoryID.startswith('14'): 
        return 'Event'
    elif categoryID.startswith('15'): 
        return 'Health and Medicine'
    elif categoryID.startswith('16'):
        return 'Landmarks and Outdoors'
    elif categoryID.startswith('17'): 
        return 'Retail'
    elif categoryID.startswith('18'): 
        return 'Sports and Recreation'
    elif categoryID.startswith('19'): 
        return 'Travel and Transportation'



### Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [30]:
# Function to return list of desired results from each Foursquare venue
def ParseFoursqaureResults(venue): 
    """ 
    Parses elements from a venue in the list of results given by a Foursquare Query.

    Args: 
        venue (dict): A venue from the list of Foursquare Venues for each station

    Returns a list of preset elements in the following order from the venue.  
    List order: ['fsqID', 'venueName', 'formattedAddress', 'catID', 'catName', 'catIDGroup', 'distance', 'rating', 'totalRatings', 'totalPhotos', 'totalTips', 'popularity', 'price', 'storeVerified']
    """
    # FS core data. All venues have this information 
    fsqID = venue['fsq_id']
    venueName = venue['name']
    formattedAddress = venue['location']['formatted_address']
    catID = venue['categories'][0]['id']
    catName = venue['categories'][0]['name']
    catIDGroup = CategoryGroup(str(catID))
    distance = venue['distance']
    
    # FS Rich data. Fields which may not exist
    try:
        rating = venue['rating']
    except: 
        rating = None

    try:
        totalRatings = venue['stats']['total_ratings']
        totalPhotos = venue['stats']['total_photos']
        totalTips = venue['stats']['total_tips']
    except: 
        totalRatings = None
        totalPhotos = None
        totalTips = None

    try:
        popularity = venue['popularity']
    except: 
        popularity = None

    try:
        price = venue['price']
    except: 
        price = None

    try:
        storeVerified = venue['verified']    
    except: 
        storeVerified = None

    returnList = [fsqID, venueName, formattedAddress, catID, catName, catIDGroup, distance, rating, totalRatings, totalPhotos, totalTips, popularity, price, storeVerified]

    return returnList

In [31]:
# Parsing all results into a list using function for tidy data that can be converted to DF
fsVenuesList = []

for i in range(len(FoursquareVenues)):
    for venues in FoursquareVenues[i]['results']:
        returnList = ParseFoursqaureResults(venues)
        returnList.append(i)
        fsVenuesList.append(returnList)

### Put your parsed results into a DataFrame

In [32]:
fsColumnsHeader = ['fsqID', 'venueName', 'formattedAddress', 'catID', 'catName', 'catIDGroup', 'distance', 'rating', 'totalRatings', 'totalPhotos', 'totalTips', 'popularity', 'price', 'storeVerified','stationIndex']

FSVenuesDF = pd.DataFrame(fsVenuesList)
FSVenuesDF.columns=fsColumnsHeader

In [33]:
FSVenuesDF.head()

Unnamed: 0,fsqID,venueName,formattedAddress,catID,catName,catIDGroup,distance,rating,totalRatings,totalPhotos,totalTips,popularity,price,storeVerified,stationIndex
0,4b5138a2f964a520cc4727e3,Market Street Steakhouse,"1777 Market St, Redding, CA 96001",13383,Steakhouse,Dining and Drinking,188,8.7,14.0,12.0,6.0,0.966425,3.0,False,0
1,581ccd7f0d63b178d22e9e54,Theory Collaborative,"1250 California St, Redding, CA 96001",13034,Café,Dining and Drinking,818,8.9,35.0,34.0,4.0,0.978524,1.0,True,0
2,4b636603f964a520ff762ae3,Taqueria Los Gordos,"1400 Pine St (Tehama), Redding, CA 96001",13099,Chinese Restaurant,Dining and Drinking,621,8.1,28.0,15.0,13.0,0.965215,2.0,False,0
3,5a110c7f0fe7a07167d2f62f,Taste & See Creamery,"1419 Market St, Redding, CA 96001",13046,Ice Cream Parlor,Dining and Drinking,579,8.0,9.0,11.0,1.0,0.974592,1.0,False,0
4,4bf3009298ac0f47ad6262a8,Damburger,"1320 Placer St, Redding, CA 96001",13031,Burger Joint,Dining and Drinking,309,7.2,27.0,8.0,10.0,0.962266,1.0,True,0


In [57]:
# FSVenuesDF.to_csv('../data/FoursquareVenuesDF.csv', sep=',',index=False)

# [Yelp API](https://docs.developer.yelp.com/reference/v3_business_search)

### Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [5]:
# Call Yelp API function
def getVenuesYelp(latitude, longitude, radius, YelpKey, categories, limit):
    """
    Get venues from yelp with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query (must be combined with longitude)
        longitude (float): longitude for query (must be combined with latitude)
        api_key (str): foursquare API to use for query
        categories (str) : string of category types. Seperate multiple types with commas
        limit (int): Number of results to return, max 50

    Returns:
        response: response object from the requests library.
    """
    headers = {
        "accept": "application/json",
        "Authorization": rf'Bearer {YelpKey}'
    }

    url = fr"https://api.yelp.com/v3/businesses/search?latitude={latitude}&longitude={longitude}&radius={radius}&categories={categories}&sort_by=distance&limit={limit}"

    results = requests.get(url, headers=headers)

    return results

[Yelp Categories List](https://docs.developer.yelp.com/docs/resources-categories) for dining and POIs, selected to match Foursquare Categories

Event Categories
* Music (music)
* Visual Arts (visual-arts)
* Performing Arts (performing-arts)
* Film (film)
* Food & Drink (food-and-drink)
* Sports & Active Life (sports-active-life)
* Nightlife (nightlife)

Categories
* Bars (bars)
* Restauruants (restaurants)
* Landmarks (landmarks)


In [3]:
# Setting Constant Variables 
YelpKey = os.getenv('YelpAPIKey2')
radius = 1000
limit = 50

The categories in the results from calling the Yelp API does not return the highest level of category Grouping. <br>
This was a problem as it would prevent us from differentiating the different categories easily once the dataframe was create and all the venues were compiled. <br>
To work around this, the API was called for each category seperately so that each call could be saved with the category. <br>
This also helped work around the call limit, as were were exceeding the 50 per call limit with all the categories at once. 

In [38]:
# Pulling all info for each bike station into a list
YelpVenues = []

# Commented out music and arts to reduce api call usage
categories = ['restaurants','bars','landmarks'] #,'food-and-drink','music','visual-arts','performing-arts','film']

for category in categories:
    for i in range(len(ReddingBikes)):
        venues = getVenuesYelp(ReddingBikes['latitude'][i], ReddingBikes['longitude'][i], radius, YelpKey, category, limit).json()
        venues['category'] = category
        venues['stationIndex'] = i
        YelpVenues.append(venues)  


### Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [39]:
# Function to return list of desired results from each yelp venue
def ParseYelpResults(venue): 
    """ 
    Parses elements from a venue in the list of results given by a Yelp Query.

    Args: 
        venue (dict): A venue from the list of Foursquare Venues for each station

    Returns a list of preset elements in the following order from the venue.  
    List order: ['YelpID','venueName','displayAddress','categoryTitle','distance','rating','reviewCount','price']
    """
    # Yelp core data. All venues have this information 
    yelpID = venue['id']
    venueName = venue['name']
    displayAddress = venue['location']['display_address']
    distance = venue['distance']
    rating = venue['rating']
    reviewCount = venue['review_count']
    
    
    # FS Rich data. Fields which may not exist
    try: 
        price = len(venue['price'])
    except:
        price = None

    returnList = [yelpID, venueName, displayAddress, distance, rating, reviewCount, price]

    return returnList

In [40]:
# Parsing all results into a list using function for tidy data that can be converted to DF
YelpVenueList = []

for i in range(len(YelpVenues)):
    for venue in YelpVenues[i]['businesses']:
        returnList = ParseYelpResults(venue)
        returnList.extend([YelpVenues[i]['stationIndex'], YelpVenues[i]['category'], YelpVenues[i]['total']])
        YelpVenueList.append(returnList)

### Put your parsed results into a DataFrame

In [41]:
yelpColumnsHeader = ['YelpID','venueName', 'displayAddress','distance','rating','reviewCount','price','stationIndex','category','totalVenues']

yelpVenuesDF = pd.DataFrame(YelpVenueList)
yelpVenuesDF.columns=yelpColumnsHeader

In [58]:
# Why does station 3 have an average distance of 2034 when the radius was set to 1000? 
# Rerunning the API for station 3 returns information with the same issues.
print(yelpVenuesDF['distance'].max())
yelpVenuesDF[yelpVenuesDF['distance'] > 1000]



10448.381839549704


Unnamed: 0,YelpID,venueName,displayAddress,distance,rating,reviewCount,price,stationIndex,category,totalVenues
159,yYzMYOfqL6HuBKZsARHUuQ,Puerto Vallarta,"[2315 Eureka Way, Redding, CA 96001]",1027.538118,3.4,96,2.0,3,restaurants,23
160,lyBBbZ-mZpBaezcRol_MtQ,Weinerschnitzel,"[1120 N Market St, Redding, CA 96001]",1049.907534,2.0,2,,3,restaurants,23
161,TfwA7rp7PQw-FEaYg0hMUA,Wienerschnitzel,"[1120 North Market St, Redding, CA 96001]",1050.498334,3.1,44,1.0,3,restaurants,23
162,LvtdDxlAYXzD7bXkmk8NHA,Downtown Grounds,"[1400 Eureka Way, Redding, CA 96001]",1095.783455,4.6,12,,3,restaurants,23
163,EKUilYPLNDyTGoG_TjXaww,Phat Straw,"[2475 Eureka Way, Redding, CA 96001]",1114.657591,4.5,26,,3,restaurants,23
...,...,...,...,...,...,...,...,...,...,...
1265,x73jVen8F8M14RnINApdKQ,Sundial Bridge,"[840 Sundial Bridge Dr, At Turtle Bay Explorat...",1046.035103,4.6,352,,7,landmarks,2
1266,6t2XgTxEqvake28gt0IlSg,The Monolith,"[Sundial Bridge Dr, Redding, CA 96001]",1038.956037,5.0,1,,10,landmarks,2
1267,x73jVen8F8M14RnINApdKQ,Sundial Bridge,"[840 Sundial Bridge Dr, At Turtle Bay Explorat...",1261.539347,4.6,352,,10,landmarks,2
1271,6t2XgTxEqvake28gt0IlSg,The Monolith,"[Sundial Bridge Dr, Redding, CA 96001]",1048.994968,5.0,1,,14,landmarks,1


In [61]:
# Dropping rows out side of radius
outsideIndex = yelpVenuesDF[yelpVenuesDF['distance'] > 1000].index
yelpVenuesDF.drop(labels=outsideIndex, inplace=True)

In [62]:
yelpVenuesDF.to_csv('../data/yelpVenuesDF.csv', sep=',',index=False)

# Comparing Results

### Which API provided you with more complete data? Provide an explanation. 

Yelp provided much more locations/venues given a very similar categories to search for (Due to API limitating 50 calls per location and limitations of category returned, each category was queried individually for each location. The distance from station was used to prioritize which once were called first). 

In [65]:
print(f'Total number of Foursqaure locations/venues found: {len(FSVenuesDF)}')

# Loop to find total locations/venues found required for Yelp as not all of them returned from API and inserted to dataframe. 
totalYelpLocations = 0

for Venues in YelpVenues:
    totalYelpLocations += Venues['total']

print(f'Total number of Yelp locations/venues found: {totalYelpLocations}')


Total number of Foursqaure locations/venues found: 655
Total number of Yelp locations/venues found: 1416


Yelp also has much more average reviews per location than foursquare. 

In [66]:
print(f"Foursqaure average of number of reviews: {FSVenuesDF['totalRatings'].mean()}")
print(f"Yelp average of number of reviews: {yelpVenuesDF['reviewCount'].mean()}")

Foursqaure average of number of reviews: 15.853556485355648
Yelp average of number of reviews: 149.69913793103447


Surprisingly, Foursquare had more unique locations than Yelp did, despite the significant difference in quantity of venues returned. This will be investigated further later.

In [67]:
print(f"Foursqaure Unique Locations: {FSVenuesDF['venueName'].nunique()}")
print(f"Yelp Unique Locations: {yelpVenuesDF['venueName'].nunique()}")

Foursqaure Unique Locations: 86
Yelp Unique Locations: 84


### Get the top 10 restaurants according to their rating

The top 10 restaurants from Yelp and from Foursquare are quite different. A minimum review count was applied to the Yelp reviews as there were multiple 5 star reviews from only a couple of reviews which skewed the results

In [68]:
print('Top 10 Restaurants from Foursquare:')
FSRestaurantsDF = FSVenuesDF[FSVenuesDF['catIDGroup']=='Dining and Drinking']
FSRestaurantsDF[['venueName','rating','totalRatings']].set_index('venueName').drop_duplicates().sort_values('rating',ascending=False).head(10)


Top 10 Restaurants from Foursquare:


Unnamed: 0_level_0,rating,totalRatings
venueName,Unnamed: 1_level_1,Unnamed: 2_level_1
Theory Collaborative,8.9,35.0
Market Street Steakhouse,8.7,14.0
Club 501,8.3,12.0
Taqueria Los Gordos,8.1,28.0
Dutch Bros Coffee,8.1,33.0
Taste & See Creamery,8.0,9.0
Dutch Bros Coffee,7.8,9.0
San Francisco Deli,7.8,11.0
Deja Vu Restaurant,7.8,54.0
Wienerschnitzel,7.6,12.0


In [69]:
print('Top 10 Restaurants from Yelp:')
YelpRestaurants = yelpVenuesDF[(yelpVenuesDF['reviewCount'] > 10)]
YelpRestaurants[['venueName','rating','reviewCount']].set_index('venueName').drop_duplicates().sort_values('rating',ascending=False).head(10)


Top 10 Restaurants from Yelp:


Unnamed: 0_level_0,rating,reviewCount
venueName,Unnamed: 1_level_1,Unnamed: 2_level_1
Westside Tap & Cork,4.9,17
Odell Craft Barbecue,4.9,26
Canelo’s,4.8,79
The Grape Escape,4.7,20
The Crepe Escape Redding,4.7,69
Sundial Bridge,4.6,352
Sandwichery,4.6,130
Trendy's,4.6,829
Downtown Grounds,4.6,12
Lucky Miller's Deli & Market,4.6,154
