In [None]:
import requests
import json
import re
import pandas as pd
import os # use this to access your environment variables
import plotly.express as px
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
import sqlite3

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [None]:
# Call to Foursquare API

bike_dataframe = pd.read_csv(r'C:\Users\aksha\Documents\LHL\LHL Assignments\Week 5\Python Statistical Modeling Project\City_Bike_Vancouver_Data_17th.csv')

# Create a a unique dataframe of lat long and fetch the results for the same
bike_dataframe_lat_long = bike_dataframe[['Latitude', 'Longitude']].drop_duplicates()

FOURSQUARE_KEY = os.getenv('FOURSQUARE_API_KEY')
FOURSQUARE_KEY

def get_venues_fs(latitude, longitude, radius, api_key, categories):
    """
    Get venues from foursquare with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query (must be combined with longitude)
        longitude (float): longitude for query (must be combined with latitude)
        api_key (str): foursquare API to use for query
        categories (str) : Foursquare-recognized place type. If not passed no place_type will be specified. 
        Separate ids with commas
    
    Returns:
        response: response object from the requests library.
    """
    url = 'https://api.foursquare.com/v3/places/search'
    params = {
        'll': f"{latitude},{longitude}",
        'radius': f"{radius}",
        'categories':'10032, 13062, 13003,13004,13005,13006,13007,13008,13009,13010,13011,13012,13013,13014,13015,13016,13017,13018,13019,13020,13021,13022,13023,13024,13025,13059,13246,13389'
    }
    headers = {"Accept": "application/json"}
    headers['Authorization'] = FOURSQUARE_KEY
    result = requests.get(url, params=params, headers=headers)
    if result.status_code == 200:
        data = result.json()
        result_df = pd.json_normalize(data['results'])
        if result_df.shape[0] > 0:

            # Group by 'closed bucket' and calculate counts
            result_df = result_df.groupby('closed_bucket').agg(
                TotalCount=('fsq_id', 'count'),
            ).reset_index()

            # Reshape the DataFrame using pivot_table
            result_df = result_df.pivot_table(index=None, columns = 'closed_bucket', values='TotalCount', fill_value = 0)
            result_df['Latitude'] = latitude
            result_df['Longitude'] = longitude
            print(result_df)
            return(result_df)
    else:
        return f"Error: {result.status_code}"
    pass





Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
fs_df = pd.DataFrame()
# Iterate and concat for each pair
for row in range(bike_dataframe_lat_long.shape[0]):
    api_result = get_venues_fs(bike_dataframe_lat_long.iloc[row]['Latitude'], 
                                      bike_dataframe_lat_long.iloc[row]['Longitude'],
                                      radius = 1000, 
                                      api_key=FOURSQUARE_KEY,
                                      categories=None) # Categories are included in the api call for each iteration hence passed None here
    fs_df = pd.concat([fs_df, api_result])


Put your parsed results into a DataFrame

In [None]:

print(fs_df.shape)
fs_df
fs_df.to_csv(r'C:\Users\aksha\Documents\LHL\LHL Assignments\Week 5\Python Statistical Modeling Project\Foursquare_Data_17th.csv', index = False)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [None]:
# Yelp API Call

api_key = os.getenv('Yelp_API_Key')
api_key

headers = {'Authorization': 'Bearer %s' % api_key}

# Define a function to categorize price
def price_categories(rating):
    if rating == '$$$':
        return 'Expensive'
    elif rating == '$$':
        return 'Economical'
    elif rating == '$':
        return 'Cheap'
    else:
        return 'No Price Details'


def get_guidelines_coord(lat, lng):
    url = 'https://api.yelp.com/v3/businesses/search'
    params = {'latitude':lat, 'longitude':lng}
    req=requests.get(url, params=params, headers=headers)
    if req.status_code == 200:
        data = pd.DataFrame(pd.json_normalize(req.json()['businesses']))
        if data.shape[0] > 0:
            new_df = data[data.distance<=1000][['id', 'is_closed', 'review_count', 
                    'rating', 'price', 'coordinates.latitude', 'coordinates.longitude']] 
                # Subset rows with distance within 1000m
            new_df['Price_Category'] = new_df['price'].apply(price_categories)
            new_df['id_dummy'] = 1

            result_df = new_df.groupby(['id_dummy']).agg(
                Total_POI=('id', 'count'),
                Closed_POI=('is_closed', 'sum'),
                AvgRating=('rating', 'mean'),
                AvgReviewCount=('review_count', 'mean'),
                PriceCategory=('Price_Category', lambda x: x.mode().iloc[0] if not x.mode().empty else 'Economical')
            ).reset_index()
            result_df['Latitude'] = lat
            result_df['Longitude'] = lng
            return result_df
        else:
            return pd.DataFrame({
                'Latitude':lat,
                'Longitude':lng,
                 'Total_POI':[],
                'Closed_POI':[],
                'AvgRating':[],
                'AvgReviewCount':[],
                'PriceCategory':[]})
    else:
        print(req.status_code)



Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
result_df = pd.DataFrame()

# Iterate and concat for each pair
for row in range(bike_dataframe_lat_long.shape[0]):
    api_result = get_guidelines_coord(bike_dataframe_lat_long.iloc[row]['Latitude'], 
                                      bike_dataframe_lat_long.iloc[row]['Longitude'])
    result_df = pd.concat([result_df, api_result])


Put your parsed results into a DataFrame

In [None]:
print(result_df.shape)
result_df
result_df.to_csv(r'C:\Users\aksha\Documents\LHL\LHL Assignments\Week 5\Python Statistical Modeling Project\Yelp_Data_17th.csv', index = False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Yelp provides more holistic data compared to Foursquare API
1. Yelp gives much more details about the venues (like ratings, review counts etc.) for a particular lat long 
2. The proportion of NANs in Foursquare API call were much more compared to that in Yelp 
3. In EDA section through histograms and frequency ditributions we can see that Foursquare has not returned more than 10 records per lat long. Also, the data is very skewed for Foursquare whereas in case of Yelp the number of POI returned ranges between 5 to 20. 

Get the top 10 restaurants according to their rating

In [None]:
# Yelp API Call for ratings

api_key = os.getenv('Yelp_API_Key')
api_key

headers = {'Authorization': 'Bearer %s' % api_key}

def get_restaurant_ratings(lat, lng):
    url = 'https://api.yelp.com/v3/businesses/search'
    params = {'latitude':lat, 'longitude':lng}
    req=requests.get(url, params=params, headers=headers)
    if req.status_code == 200:
        data = pd.DataFrame(pd.json_normalize(req.json()['businesses']))
        if data.shape[0] > 0:
            new_df = data[data.distance<=1000][['id', 'name', 'location.address1','is_closed', 'review_count', 
                    'rating', 'price', 'coordinates.latitude', 'coordinates.longitude']]
            return(new_df)
    else:
        print(req.status_code)
        
ratings_df = pd.DataFrame()

# Iterate and concat for each pair
for row in range(bike_dataframe_lat_long.shape[0]):
    api_result = get_restaurant_ratings(bike_dataframe_lat_long.iloc[row]['Latitude'], 
                                      bike_dataframe_lat_long.iloc[row]['Longitude'])
    ratings_df = pd.concat([ratings_df, api_result])

In [None]:
# Top 10 restaurants as per the ratings

ratings_df.sort_values('rating', ascending = False).head(10)