# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [23]:
import requests
import pandas as pd
import os

# Retrieve the DataFrame from the IPython store
%store -r df_bike
#df_bikes = pd.read_csv("city_bikes_ll_2.csv") # reading locations for bike stations (from previous task) // was checking alternative 
api_key = os.environ["location"]

headers = {"Accept": "application/json"} # Create dictionary for headers

headers['Authorization'] = api_key # Add key with our API KEY

fqr_restaurant_data = [] # here will store data for restaurants
i=0
# Loop through each bike station in df_bike
for index, station in df_bike.iterrows():
    i+=1
    latitude = station["Latitude"]
    longitude = station["Longitude"]
    station_id=station["Station_id"] # will need in order to join together data from city_bikes,yelp and foursquare
    print(latitude,longitude,station_id)
    if i>10:
        break
    # Query Foursquare API for restaurants
    foursquare_url = "https://api.foursquare.com/v3/places/search"
    foursquare_params = {
        "ll": f"{latitude},{longitude}",
        "radius": 1000,  # Define the radius to search for venues around the bike station (in meters)
        "query": "restaurant",
        "limit":10 # limiting number of bussines results to 10
     }
    foursquare_response = requests.get(foursquare_url, headers=headers,params=foursquare_params)
    #print(foursquare_response)
    if foursquare_response.status_code == 200:
        foursquare_data = foursquare_response.json()
        #print(foursquare_data)
        for restaurant_fqr in foursquare_data["results"]:
             # Add the station ID to the restaurant data
            restaurant_fqr["Station_id"] = station_id
            restaurant_fqr["Rating_fqr"]=0
        fqr_restaurant_data.extend(foursquare_data["results"])
    else:
        print(f"Failed to fetch Foursquare data for station {index}.")

43.665269 -79.319796 fb337bbed72e2be090071e199899b2be
43.67142 -79.445947 4ff88d5880e71aa40d34cfe5d09b0ca7
43.666224 -79.317693 a09c67c0b419654d907c9134b108e328
43.653236 -79.376716 d6a9daee68070a8b106cfb598d81308c
43.663722 -79.380288 8f8af40d9388c8a3962559e8681d3db7
43.657763 -79.389165 5a441560e262305d1934fb3ada5a9517
43.656729 -79.382736 18ea490ab3ca4d10e2927308416edbef
43.664467 -79.414783 b7850948607474362d3728f19a77fdff
43.675492 -79.388858 7ffd06e0207f80fe1ff6b5ae0bb0544c
43.674991 -79.396273 17fadbdc5848036c8485099bbf517294
43.671944 -79.387778 843dcb87ce2ddb002d2b2900a5418355


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [18]:

fqr_restaurant_df = pd.DataFrame(fqr_restaurant_data)

# Initialize lists to store the extracted data from foursquere
fqr_names = []
fqr_latitudes = []
fqr_longitudes = []
fqr_distances = []
fqr_station_ids=[]
fqr_ratings=[]

for index in range(len(fqr_restaurant_df)):
    fqr_longitude =fqr_restaurant_df['geocodes'].iloc[index]['main']['longitude']
    fqr_latitude =fqr_restaurant_df['geocodes'].iloc[index]['main']['latitude']
    fqr_name=fqr_restaurant_df.iloc[index]['name']
    fqr_distance=fqr_restaurant_df.iloc[index]['distance']
    fqr_station_id=fqr_restaurant_df.iloc[index]['Station_id']
    fqr_rating=fqr_restaurant_df.iloc[index]['Rating_fqr']
    
    # Append the data to the corresponding lists
    fqr_names.append(fqr_name)
    fqr_latitudes.append(fqr_latitude)
    fqr_longitudes.append(fqr_longitude)
    fqr_distances.append(fqr_distance)
    fqr_station_ids.append(fqr_station_id)
    fqr_ratings.append(fqr_rating)
    
# creating new dataframe with selected columns
fqr_restaurant_df_selected=pd.DataFrame({
    'Name': fqr_names,
    'Latitude': fqr_latitudes,
    'Longitude': fqr_longitudes,
    'Distance': fqr_distances,
    'Rating':fqr_ratings,
    'Station_id':fqr_station_ids
})

fqr_restaurant_df_selected
%store fqr_restaurant_df_selected

Stored 'fqr_restaurant_df_selected' (DataFrame)


In [6]:
fqr_restaurant_df_selected

Unnamed: 0,Name,Latitude,Longitude,Distance,Rating,Station_id


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [24]:
import requests
import pandas as pd
import os


#df_bikes = pd.read_csv("city_bikes_ll_2.csv") // was checking alternative 
yelp_api_key = os.environ["gm-yelp"]

yelp_restaurant_data = [] # restoaurants from yelp
i=0
# Loop through each bike station in df_bikes
   # FOR EACH LOCATION FROM BIKES WILL GET 20 RESTAURANT LOCATIONS
for index, station in df_bike.iterrows():
    i+=1
    latitude = station["Latitude"]
    longitude = station["Longitude"]
    station_id=station["Station_id"] # will need in order to join together data from city_bikes,yelp and foursquare
    print(latitude,longitude)
    if i>10:
        break
    
    yelp_url = "https://api.yelp.com/v3/businesses/search"
    yelp_headers = {
            "Authorization": f"Bearer {yelp_api_key}"
        }
    yelp_params = {
       "latitude": latitude,
       "longitude": longitude,
       "radius": 1000,
       "term": "Restaurants", 
        "limit":10 # limiting number of bussines results to 10
    }
    yelp_response = requests.get(yelp_url, headers=yelp_headers, params=yelp_params)
    if yelp_response.status_code == 200:
        yelp_data = yelp_response.json()
        for restaurant_yelp in yelp_data["businesses"]:
        # Add the station ID to the restaurant data
            restaurant_yelp["station_id"] = station_id
        yelp_restaurant_data.extend(yelp_data["businesses"])
    else:
        print(f"Failed to fetch Yelp data for station {index}.")


43.665269 -79.319796
43.67142 -79.445947
43.666224 -79.317693
43.653236 -79.376716
43.663722 -79.380288
43.657763 -79.389165
43.656729 -79.382736
43.664467 -79.414783
43.675492 -79.388858
43.674991 -79.396273
43.671944 -79.387778


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [8]:

# Create DataFrames _POI data (yelp restaurants)
yelp_restaurant_data_df= pd.DataFrame(yelp_restaurant_data)
print("\nYelp Restaurants:")
yelp_restaurant_data_df

# Initialize lists to store the extracted data from yelp
yelp_names = []
yelp_latitudes = []
yelp_longitudes = []
yelp_distances = []
yelp_ratings=[]
yelp_station_ids=[]

for index in range(len(yelp_restaurant_data_df)):
    yelp_name=yelp_restaurant_data_df.iloc[index]['name']
    yelp_latitude=yelp_restaurant_data_df.iloc[index]['coordinates']['latitude']
    yelp_longitude=yelp_restaurant_data_df.iloc[index]['coordinates']['longitude']   
    yelp_distance=yelp_restaurant_data_df.iloc[index]['distance']
    yelp_rating=yelp_restaurant_data_df.iloc[index]['rating']
    yelp_station_id=yelp_restaurant_data_df.iloc[index]['station_id']
    
    yelp_names.append(yelp_name)
    yelp_latitudes.append(yelp_latitude)
    yelp_longitudes.append(yelp_longitude)
    yelp_distances.append(yelp_distance)
    yelp_ratings.append(yelp_rating)
    yelp_station_ids.append(yelp_station_id)
    
#creating new dataframe with selected/filtered columns
yelp_restaurant_df_selected=pd.DataFrame({
    'Name': yelp_names,
    'Latitude': yelp_latitudes,
    'Longitude': yelp_longitudes,
    'Distance': yelp_distances,
    'Rating':yelp_ratings,
    'Station_id':yelp_station_ids
})
yelp_restaurant_df_selected
%store yelp_restaurant_df_selected


Yelp Restaurants:
Stored 'yelp_restaurant_df_selected' (DataFrame)


In [9]:
yelp_restaurant_df_selected

Unnamed: 0,Name,Latitude,Longitude,Distance,Rating,Station_id


Put your parsed results into a DataFrame

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Yelp provide more complete and structured data compared with Foursquere. Important parameters such as: alias, is closed, categories (type of restaurant), ratings and reviews, phone numbers exist only in Yelp.
Besides Yelp's structure is more orginized and easy to work with.
Parsing from Yelp was faster 58secs vs 90 secs with limitations of 100 stations and 10 restaurants
Yelp return 994 rows, Foursquare:977 rows.

Get the top 10 restaurants according to their rating

In [232]:
yelp_restaurant_df_top10 = yelp_restaurant_df_selected.sort_values(by='Rating', ascending=False).head(10)

yelp_restaurant_df_top10

Unnamed: 0,Name,Latitude,Longitude,Distance,Rating,Station_id
835,Papamio's,43.67196,-79.41928,241.184451,5.0,37c32c09c594b83701b3b8f6e32db605
917,Masa Buka,43.686488,-79.347888,723.520019,5.0,a7ca61373064c6581461132f1ae050de
154,East End Vine,43.666996,-79.344665,55.437143,5.0,784016bbba7f7be920647e7f8ec00251
904,Papyrus,43.67709,-79.35324,418.200804,5.0,ddba9c631e11fbae48e2be60289f60c2
827,Morellina’s,43.66725,-79.419999,313.120208,5.0,0abdfc3177000757dcd71bd4c8b479ee
397,Gurume Sushi,43.661391,-79.380995,174.501781,5.0,b086f4ccca33b1b976b40abe69b78d42
825,Papamio's,43.67196,-79.41928,249.328387,5.0,0abdfc3177000757dcd71bd4c8b479ee
824,Maison T,43.67052,-79.42632,445.221433,5.0,0abdfc3177000757dcd71bd4c8b479ee
43,Som Tum Jinda,43.660199,-79.378211,425.92144,5.0,8f8af40d9388c8a3962559e8681d3db7
232,Som Tum Jinda,43.660199,-79.378211,517.132788,5.0,e565aef96d6d5fce5279ef6f24b9ce69


note: 43 and 232 both have the same locations, names and ratings, but different distance. 
    Need further investigation to understand the reason. Probably one to be dropped.
    