In [None]:
# imports

# Foursquare

## 1. Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [2]:
import requests
import os
import pandas as pd
from pprint import pprint

# Get locations (latitude, longitude) for each station in Queretaro, Mexico.
response = requests.get("http://api.citybik.es/v2/networks/qrobici")
data = response.json()

# Make an empty dictionary to save all the information retrieved from the JSON response when going through the loop. 
stations_coor = []

for station in data['network']['stations']:
        latitude = station['latitude']
        longitude = station['longitude']
        
        stations_coor.append({
            'Latitute': latitude,
            'Longitude': longitude
        })

stations_coor = pd.DataFrame(stations_coor) #Information to use to obtain restaurants or bars from Foursquare

In [3]:
# Api Key
api_key = os.environ["FOURSQUARE_API_KEY_ZSH"]

In [25]:
# Converting the stations_coor DataFrame to a list, then formatting to match the URL requirements from the Foursquare API: 
stations_coor_list = stations_coor.values.tolist()
stations_coor_list_rounded = [[round(x, 2) for x in coord] for coord in stations_coor_list]
geocodes = [f'{coord[0]},{coord[1]}' for coord in stations_coor_list_rounded]

# Defining a function to get the get request giving a coordinate from geocodes
def find_restaurants(geocodes):
    url = "https://api.foursquare.com/v3/places/search?query=restaurant&radius=1000&fields=name,hours,rating,popularity,price,distance,tips&ll="
    headers = {"accept": "application/json"}
    headers['Authorization'] = api_key
    response = requests.get(url=url+geocodes, headers = headers) 
    data = response.json()
    return data

# Create an empty list to append the information obtained from the find_restaurants function for each of the geocode pairs from geocodes. 
restaurants_list = []

for geocode in geocodes: 
    data = find_restaurants(geocode)
    restaurants_list.append(data)

In [30]:
# Saving and loading information into files with JSON
import json
with open('data2.json', 'w') as f:
    json.dump(restaurants_list, f)

with open('data2.json', 'r') as f:
    saved_restaurants_list = json.loads(f.read())

pprint(saved_restaurants_list)

[{'context': {'geo_bounds': {'circle': {'center': {'latitude': 20.58,
                                                   'longitude': -100.41},
                                        'radius': 1000}}},
  'results': [{'distance': 47,
               'hours': {'display': 'Tue 13:00-22:00; Wed 13:00-22:30; Thu-Sat '
                                    '13:00-23:00; Sun 13:00-20:30',
                         'is_local_holiday': False,
                         'open_now': True,
                         'regular': [{'close': '2200',
                                      'day': 2,
                                      'open': '1300'},
                                     {'close': '2230',
                                      'day': 3,
                                      'open': '1300'},
                                     {'close': '2300',
                                      'day': 4,
                                      'open': '1300'},
                                     {'close': '

## 2. Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
#Create a list of restaurant tips for all restaurants around a bike station, in this example station in the position 1 in the list. 
all_tips_per_bike_station = [(r['name'], tip['text']) for r in saved_restaurants_list[1]['results'] for tip in r['tips']]
pprint(all_tips_per_bike_station)

In [63]:
#Create a list of restaurant hours for all restaurants around a bike station, in this example station in the position 4 in the list. 
all_hoursdisplay_per_bike_station = [(r['name'], hours['display']) 
                                     for r in saved_restaurants_list[4]['results'] 
                                     for hours in [r['hours']]
                                     if isinstance(r['hours'], dict) and 'display' in r['hours']
                                    ]
pprint(all_hoursdisplay_per_bike_station)

[('Tortas Quality', 'Mon-Fri 7:30-15:00; Sat 7:30-13:30'),
 ('Cafe Tulipe', 'Mon-Thu 8:00-22:00; Fri 8:00-23:00; Sat-Sun 8:00-22:00'),
 ('Norteño Grill Restaurante',
  'Mon-Wed 13:00-22:00; Thu-Sat 13:00-23:59; Sun 13:00-18:00'),
 ('Sonora Grill - Puerta Victoria',
  'Mon 00:00-2:00; Tue-Thu 13:00-23:59; Fri-Sun 00:00-2:00'),
 ('Mirthala', 'Mon-Sat 7:00-23:00; Sun 8:00-17:00')]


Put your parsed results into a DataFrame


In [76]:
# Retrieve information of interest: name, distance, popularity, rating, price and hours (display):

extracted_data = []

for entry in saved_restaurants_list:
    for result in entry['results']:
        hours_display = result['hours'].get('display', None)  # Get the 'display' value if it exists, else None
        price = result.get('price', None) # Get the price if exists, otherwise None. 
        if hours_display is not None:
            extracted_data.append({
                'distance': result['distance'],
                'name': result['name'],
                'popularity': result['popularity'],
                'price': price,
                'rating': result['rating'],
                'hours_display': hours_display
            })

# Creating DataFrame
restaurant_info_df = pd.DataFrame(extracted_data)
restaurant_info_df

Unnamed: 0,distance,name,popularity,price,rating,hours_display
0,47,Monosabio Hamburguesería,0.979649,3.0,8.5,Tue 13:00-22:00; Wed 13:00-22:30; Thu-Sat 13:0...
1,149,Taqueria Jalisco,0.937276,1.0,7.9,Open Daily 0:00-4:00
2,158,Los Idolos,0.941187,1.0,7.7,Tue-Sat 12:30-22:00; Sun 12:30-20:00
3,243,Fogon do Brasil Queretaro,0.989483,2.0,8.2,Mon-Sat 13:00-22:00; Sun 13:00-21:00
4,598,Sushi Roll,0.987812,2.0,8.2,Mon-Thu 13:00-23:00; Fri-Sat 13:00-24:00; Sun ...
...,...,...,...,...,...,...
236,322,Mara & Co,0.990527,,7.8,Open Daily 8:00-22:30
237,340,Vivar Sushi,0.120228,2.0,8.0,Mon-Sat 13:00-20:00; Sun 13:00-19:00
238,732,Baja 5 Gastrobar,0.968391,3.0,8.0,Tue-Wed 14:00-23:30; Thu 14:00-23:59; Fri-Sun ...
239,710,Estación Central QRO,0.984338,,7.7,Tue-Sat 13:00-23:59; Sun 13:00-20:00


*Analysis of code: The condition if hours_display is not None: is used to filter out entries where the 'display' key is missing from the 'hours' dictionary. If 'display' is missing, the get() method returns None, indicating that the opening hours display information is not available for that entry.*

*By checking if hours_display is not None, the code ensures that only entries with the 'display' key present in the 'hours' dictionary are included in the extracted_data list. This way, the resulting DataFrame will only contain rows where the opening hours display information is available, and it avoids adding incomplete or missing data to the DataFrame.*

# YELP

## 1. Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

*The personal Yelp API Key was set up in a different Jupyter file to use as a variable in this file.*

In [95]:
yelp_api_key = os.environ["YELP_API_KEY"]

# Defining a function to get the get request giving a coordinate from geocodes
def find_restaurants_yelp(latitude, longitude):
    url = "https://api.yelp.com/v3/businesses/search?term=restaurants&radius=1000&categories=&price=1&price=2&price=3&price=4&attributes=parking&attributes=wifi&attributes=ambience&sort_by=best_match&limit=20"
    headers = {"accept": "application/json",
               'Authorization':'Bearer ' + yelp_api_key}
    response = requests.get(url=url+f'&latitude={latitude}'+f'&longitude={longitude}', headers = headers) 
    data_yelp = response.json()
    return data_yelp

# Create an empty list to append the information obtained from the find_restaurants function for each of the geocode pairs from geocodes. 
restaurants_list_yelp = []

for latitude, longitude in stations_coor_list: 
    data_yelp = find_restaurants_yelp(latitude, longitude)
    restaurants_list_yelp.append(data_yelp)



In [96]:
# Saving and loading information into files with JSON

with open('data3.json', 'w') as f:
    json.dump(restaurants_list_yelp, f)

with open('data3.json', 'r') as f:
    saved_restaurants_list_yelp = json.loads(f.read())

pprint(saved_restaurants_list_yelp)

[{'businesses': [{'alias': 'fogon-do-brasil-santiago-de-querétaro-2',
                  'attributes': {'business_temp_closed': None,
                                 'menu_url': None,
                                 'open24_hours': None,
                                 'waitlist_reservation': None},
                  'categories': [{'alias': 'brazilian', 'title': 'Brazilian'}],
                  'coordinates': {'latitude': 20.5804272,
                                  'longitude': -100.4117841},
                  'display_phone': '+52 442 218 3537',
                  'distance': 270.08098203444536,
                  'id': 'N5phZxUHfVIYEZlS9lkOsA',
                  'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/-Y-XyEqDfGl42750mRdoMw/o.jpg',
                  'is_closed': False,
                  'location': {'address1': 'Prol. Av. Zaragoza 60',
                               'address2': 'Col. Ensueño',
                               'address3': None,
                         

## 2. Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [105]:
# Assuming saved_restaurants_list_yelp[1]['businesses'][0] is the dictionary you want to access
business = saved_restaurants_list_yelp[1]['businesses'][0]

categories = business['categories']
distance = business['distance']
id = business['id']
name = business['name']
price = business['price']
rating = business['rating']

print(categories)
print(distance)
print(id)
print(name)
print(price)
print(rating)

[{'alias': 'oaxacan', 'title': 'Oaxacan'}, {'alias': 'yucatan', 'title': 'Yucatan'}]
1169.5500257882732
Y2Iqqe13-n7_60q9ND0vMA
Tikua Sureste
$$
4.7


## 3. Put your parsed results into a DataFrame

In [106]:
# Retrieve information of interest: name, distance, categories, rating, price and review count.

extracted_data_yelp = []

for entry in saved_restaurants_list_yelp:
    for business in entry['businesses']:
        extracted_data_yelp.append({
            'distance': business['distance'],
            'name': business['name'],
            'categories': business['categories'],
            'price': business['price'],
            'rating': business['rating']
            })

# Creating DataFrame
restaurant_info_yelp_df = pd.DataFrame(extracted_data_yelp)
restaurant_info_yelp_df

Unnamed: 0,distance,name,categories,price,rating
0,270.080982,Fogon do Brasil,"[{'alias': 'brazilian', 'title': 'Brazilian'}]",$$,3.0
1,919.137726,Josecho,"[{'alias': 'wine_bars', 'title': 'Wine Bars'},...",$$$$,5.0
2,1001.499627,Karnes en Su Jugo,"[{'alias': 'comfortfood', 'title': 'Comfort Fo...",$$,4.8
3,989.584691,Ichiban,"[{'alias': 'japanese', 'title': 'Japanese'}]",$$,3.8
4,641.412281,Pawa,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",$$,0.0
...,...,...,...,...,...
714,150.863375,Wings Army,"[{'alias': 'tradamerican', 'title': 'American'}]",$$,1.5
715,1113.372263,Restaurant Bar el Palomar,"[{'alias': 'restaurants', 'title': 'Restaurant...",$,4.0
716,351.277964,Sushi Itto,"[{'alias': 'sushi', 'title': 'Sushi Bars'}]",$$,2.0
717,493.579839,La Barbacha de los Cuatro,"[{'alias': 'bbq', 'title': 'Barbeque'}]",$,0.0


# Comparing Results

## 1. Which API provided you with more complete data? Provide an explanation. 

In [122]:
restaurant_info_df['name'].value_counts()

name
Di Vino                            7
Chinicuil                          7
Hotel la Casa de la Marquesa       7
Restaurante 1810                   7
Breton                             7
                                  ..
El Sazón de Papá                   1
California Pizza Kitchen           1
Fogon do Brasil Queretaro          1
Los Idolos                         1
Angus Butcher House - Queretaro    1
Name: count, Length: 74, dtype: int64

In [121]:
restaurant_info_yelp_df['name'].value_counts()

name
María y Su Bici               18
Restaurante Bar 1810          17
El Mesón De Chucho El Roto    17
Chinicuil                     17
Café Breton                   17
                              ..
Mesón de la Corregidora        1
El Fogón del Pastor            1
Karnes en Su Jugo              1
El Comal Antojitos             1
Sushi Itto                     1
Name: count, Length: 100, dtype: int64

In [132]:
unique_fq_rest = restaurant_info_df.drop_duplicates(subset = ['name']).dropna()
unique_fq_rest

Unnamed: 0,distance,name,popularity,price,rating,hours_display
0,47,Monosabio Hamburguesería,0.979649,3.0,8.5,Tue 13:00-22:00; Wed 13:00-22:30; Thu-Sat 13:0...
1,149,Taqueria Jalisco,0.937276,1.0,7.9,Open Daily 0:00-4:00
2,158,Los Idolos,0.941187,1.0,7.7,Tue-Sat 12:30-22:00; Sun 12:30-20:00
3,243,Fogon do Brasil Queretaro,0.989483,2.0,8.2,Mon-Sat 13:00-22:00; Sun 13:00-21:00
4,598,Sushi Roll,0.987812,2.0,8.2,Mon-Thu 13:00-23:00; Fri-Sat 13:00-24:00; Sun ...
...,...,...,...,...,...,...
224,436,Cocono,0.981471,1.0,8.8,Mon-Sat 13:00-22:00; Sun 13:00-18:00
225,458,Fisher's,0.995444,3.0,8.6,Mon-Tue 9:30-21:00; Wed-Sat 9:30-22:00; Sun 9:...
226,498,La Bocha,0.990963,3.0,8.8,Mon-Wed 13:00-23:00; Thu-Sat 13:00-23:59; Sun ...
227,407,Arrachera's Steak House,0.975055,4.0,7.5,Open Daily 12:00-21:00


In [133]:
unique_yelp_rest = restaurant_info_yelp_df.drop_duplicates(subset = ['name']).dropna()
unique_yelp_rest

Unnamed: 0,distance,name,categories,price,rating
0,270.080982,Fogon do Brasil,"[{'alias': 'brazilian', 'title': 'Brazilian'}]",$$,3.0
1,919.137726,Josecho,"[{'alias': 'wine_bars', 'title': 'Wine Bars'},...",$$$$,5.0
2,1001.499627,Karnes en Su Jugo,"[{'alias': 'comfortfood', 'title': 'Comfort Fo...",$$,4.8
3,989.584691,Ichiban,"[{'alias': 'japanese', 'title': 'Japanese'}]",$$,3.8
4,641.412281,Pawa,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",$$,0.0
...,...,...,...,...,...
545,598.271366,Tonino's,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",$$,4.3
546,918.508493,Sante Restaurante,"[{'alias': 'mexican', 'title': 'Mexican'}]",$$$,4.0
547,442.146233,La Barbacha de los Cuatro,"[{'alias': 'bbq', 'title': 'Barbeque'}]",$,0.0
636,730.597618,El Comal Antojitos,"[{'alias': 'mexican', 'title': 'Mexican'}]",$,1.7


In [134]:
unique_fq_rest['name'].value_counts()

name
Monosabio Hamburguesería           1
La Crécola                         1
Bogati Pizza                       1
Maria y Su Bici                    1
Il Diavolo                         1
                                  ..
Sonora Grill - Puerta Victoria     1
Mirthala                           1
Fa yian                            1
La Huerta de Carlota               1
Angus Butcher House - Queretaro    1
Name: count, Length: 67, dtype: int64

In [135]:
unique_yelp_rest['name'].value_counts()

name
Fogon do Brasil            1
McDonald's                 1
Jochadora                  1
Fiume 37                   1
Wings Army                 1
                          ..
Diktatur                   1
Wingstop                   1
Las Brisas                 1
Trattoria da Pulcinella    1
Sushi Itto                 1
Name: count, Length: 100, dtype: int64

In [138]:
print(f'Yelp Restaurants: {restaurant_info_yelp_df.shape[0]}')
print(f'Foursquare Restaurants: {restaurant_info_df.shape[0]}')
print(f'Yelp Restaurants - No duplicates or NA: {unique_yelp_rest.shape[0]}')
print(f'Foursquare Restaurants - No duplicates or NA: {unique_fq_rest.shape[0]}')

Yelp Restaurants: 719
Foursquare Restaurants: 241
Yelp Restaurants - No duplicates or NA: 100
Foursquare Restaurants - No duplicates or NA: 67


*The Yelp API resulted in more results in total, almost 3 times more. There was also other information that could have been retrieved that was of more interest that Foursquare didn't have. However, some of that information, like businness hours, were only available with a second get request, that require the businesses 'id'. With the Foursquare, that information was obtained with a single get request.*

*Another thing to consider is that, after dropping all duplicates, the total number of restaurants dropped significantly, this due to several restaurants being included in the list for being near several bike stations. The number of restaurants also dropped because the rows with missing values were no included.*

## 2. Get the top 10 restaurants according to their rating

### *Top 10 restaurants from Yelp API*

In [141]:
unique_yelp_rest[['name', 'rating']].sort_values('rating', ascending = False).head(10)

Unnamed: 0,name,rating
1,Josecho,5.0
22,Portal de Santa Rosa,5.0
371,Jochadora,5.0
88,Taquería Jalisco II,5.0
90,Che Papusa,5.0
6,Las Delicias,5.0
370,Fiume 37,4.8
20,La Charamusca,4.8
2,Karnes en Su Jugo,4.8
456,Mochomos,4.7


### *Top 10 restaurants from Foursquare API*

In [142]:
unique_fq_rest[['name', 'rating']].sort_values('rating', ascending = False).head(10)

Unnamed: 0,name,rating
133,Alta Fibra Querétaro,9.2
22,Tikua Sur Este,9.1
31,Sonora Grill - Puerta Victoria,9.0
223,Habibi Restaurante,9.0
136,Il Duomo,9.0
18,La Dulce Compañía,8.9
131,Il Diavolo,8.9
175,Fiume 37,8.9
7,iL Diavolo,8.9
226,La Bocha,8.8
