# YELP

In [1]:
# Import all the packages to be used:

import requests
import json
import pandas as pd
import os

In [2]:
# Call my environment key, url, and pull the bikes.csv file to use its latitudes and longitudes:

api_key = os.environ['YELP_API_KEY']

base_url = 'https://api.yelp.com/v3/businesses/search'

df_bikes = pd.read_csv('bikes.csv')


# Create a list where to store the data:

results = []


# Loop through each latitude and longitude from the bikes dataframe:

for index, row in df_bikes.iterrows():
    latitude = row['latitude']  
    longitude = row['longitude'] 
    radius = 1000

    
# Specify headers and parameters according the API documentation:    
    
    params = {
                'latitude':latitude,
                'longitude':longitude,
                'radius': radius,
                }

    headers = {
                'Authorization': f'Bearer {api_key}',
                }

# Apply the get function to pull the data from the website:

    response = requests.get(base_url, params=params, headers=headers)

    
# 'Status code 200' means that the method is succesful, so only store the data if this worked:

    if response.status_code == 200:
        data = response.json()

# I also would like to add a column with the id of the bikes from the bikes.csv file in order to work with it later.
# This column will be named 'bikes_id':

        for i in data['businesses']:
            i['bikes_id'] = index
            results.extend(data['businesses'])
        

In [3]:
# Create a dataframe named 'df_yelp' with the data collected: 

df_yelp = pd.DataFrame(results)
df_yelp.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance,bikes_id
0,6iOAgzJ0DRZNSKA3FSrrOg,la-taqueria-pinche-taco-shop-vancouver,La Taqueria Pinche Taco Shop,https://s3-media1.fl.yelpcdn.com/bphoto/fjhIj3...,False,https://www.yelp.com/biz/la-taqueria-pinche-ta...,685,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.0,"{'latitude': 49.263559, 'longitude': -123.112736}",[],$$,"{'address1': '2450 Yukon Street', 'address2': ...",16045582549,+1 604-558-2549,169.517456,0
1,XAH2HpuUUtu7CUO26pbs4w,saku-vancouver-8,Saku,https://s3-media1.fl.yelpcdn.com/bphoto/mKOSej...,False,https://www.yelp.com/biz/saku-vancouver-8?adju...,234,"[{'alias': 'japanese', 'title': 'Japanese'}]",4.5,"{'latitude': 49.2631006, 'longitude': -123.116...",[],$$,"{'address1': '548 W Broadway', 'address2': '',...",16044234600,+1 604-423-4600,178.845344,0
2,nkDZY5xqihF3XtZMzzfqqg,hokkaido-ramen-santouka-vancouver-5,Hokkaido Ramen Santouka,https://s3-media2.fl.yelpcdn.com/bphoto/9Fe-Lq...,False,https://www.yelp.com/biz/hokkaido-ramen-santou...,226,"[{'alias': 'noodles', 'title': 'Noodles'}, {'a...",4.0,"{'latitude': 49.263127, 'longitude': -123.1168...",[],$$,"{'address1': '558 W Broadway', 'address2': '',...",16045651770,+1 604-565-1770,191.044234,0
3,NensKn1MSVU_rm-1Y6WlFA,marulilu-cafe-vancouver,Marulilu Cafe,https://s3-media2.fl.yelpcdn.com/bphoto/uwwarf...,False,https://www.yelp.com/biz/marulilu-cafe-vancouv...,288,"[{'alias': 'cafes', 'title': 'Cafes'}]",4.0,"{'latitude': 49.26338, 'longitude': -123.11415}",[],$$,"{'address1': '451 W Broadway', 'address2': '',...",16045684211,+1 604-568-4211,102.329817,0
4,oHYOa9l5aLOQpJ8dF6sDmA,menya-raizo-vancouver,Menya Raizo,https://s3-media2.fl.yelpcdn.com/bphoto/G0cHCt...,False,https://www.yelp.com/biz/menya-raizo-vancouver...,78,"[{'alias': 'ramen', 'title': 'Ramen'}]",4.0,"{'latitude': 49.26336, 'longitude': -123.11314}",[],$$,"{'address1': '401 W Broadway', 'address2': Non...",16048290697,+1 604-829-0697,133.678673,0


Get the top 10 restaurants according to their rating

In [10]:
df_yelp[['name','rating']].sort_values(by='rating', ascending=False).iloc[:10]

Unnamed: 0,name,rating
24719,NUTTEA,5.0
83397,Arike Restaurant,5.0
21709,Arike Restaurant,5.0
2257,Manoush'eh,5.0
61285,Number E Food,5.0
87176,Camion Cafe,5.0
66745,Number E Food,5.0
46336,Quizine Kitchen,5.0
72698,Urban Tadka,5.0
26248,Mazahr Lebanese Kitchen,5.0


In [68]:
# Create a dataframe with only the columns of interest:

columns_to_keep = ['id', 'bikes_id', 'review_count', 'rating', 'price', 'distance']

df_yelp = df_yelp[columns_to_keep]

df_yelp

Unnamed: 0,id,bikes_id,review_count,rating,price,distance
0,6iOAgzJ0DRZNSKA3FSrrOg,0,685,4.0,$$,169.517456
1,XAH2HpuUUtu7CUO26pbs4w,0,234,4.5,$$,178.845344
2,NensKn1MSVU_rm-1Y6WlFA,0,289,4.0,$$,102.329817
3,4118Aq9LbkvUr4s719uUkA,0,117,4.5,$$$,152.157897
4,F5wxgIiZE7LYQxgqhI483A,0,32,4.5,,168.615262
...,...,...,...,...,...,...
96997,irdrKokOvuxP_atEjtzkQA,243,397,4.0,$$,569.394966
96998,FmD4x9ClmSzSLKOTV7jfXw,243,177,4.0,$$,744.999190
96999,dZEUL3D32Ny6qQQqbFT0kQ,243,50,4.5,$,842.112588
97000,P78gUElTZe5HJGLRjtouig,243,61,4.0,$$,381.561636


In [69]:
# Save the results in csv:

df_yelp.to_csv('yelp.csv')

# FOURSQUARE

In [82]:
# Very similar to YELP's section, with the exception that parameters are called a little different:

# Call my environment key, url, and pull the bikes.csv file to use its latitudes and longitudes:

url = "https://api.foursquare.com/v3/places/search"

api_key = os.environ['FOURSQUARE_API_KEY']

df_bikes = pd.read_csv('bikes.csv')


# Create a list where to store the data:

results = []


# Loop through each latitude and longitude from the bikes dataframe:

for index, row in df_bikes.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    radius = 1000

    
# Specify headers and parameters according the API documentation:

    params = {
        'll': f'{latitude},{longitude}',
        'radius': radius
    }

    headers = {'Accept': 'application/json'}

    headers['Authorization'] = api_key

    
# Apply the get function to pull the data from the website:

    response = requests.get(url, params=params, headers=headers)

    
# 'Status code 200' means that the method is succesful, so only store the data if this worked:

    if response.status_code == 200:
            data = response.json()

            
# Same as Yelp's, add a column with the id of the bikes from the bikes.csv file in order to work with it later.
# This column will be named 'bikes_id':

            for i in data['results']:
                i['bikes_id'] = index
                results.extend(data['results'])

In [83]:
# My data of interest is 'category_id' which is contained within the column 'categories'.
# 'categories' column is a list of dictionaries, so will have to work on it to pull the desired data:

df_4s = pd.DataFrame(results)
df_4s.head()

Unnamed: 0,fsq_id,categories,chains,closed_bucket,distance,geocodes,link,location,name,related_places,timezone,bikes_id
0,4b1db335f964a520211424e3,"[{'id': 13034, 'name': 'Café', 'short_name': '...",[],VeryLikelyOpen,96,"{'main': {'latitude': 49.263328, 'longitude': ...",/v3/places/4b1db335f964a520211424e3,"{'address': '451W Broadway W', 'country': 'CA'...",Marulilu Cafe,{},America/Vancouver,0
1,4e223f06d4c0d32590f80ff4,"[{'id': 13306, 'name': 'Taco Restaurant', 'sho...",[],VeryLikelyOpen,164,"{'drop_off': {'latitude': 49.263553, 'longitud...",/v3/places/4e223f06d4c0d32590f80ff4,"{'address': '2450 Yukon St', 'address_extended...",La Taqueria Pinche Taco Shop,{},America/Vancouver,0
2,4aa73d25f964a520704c20e3,"[{'id': 17069, 'name': 'Grocery Store', 'short...",[],VeryLikelyOpen,182,"{'main': {'latitude': 49.263957, 'longitude': ...",/v3/places/4aa73d25f964a520704c20e3,"{'address': '510 8th Ave W', 'country': 'CA', ...",Whole Foods,{'children': [{'fsq_id': '61a828687b754c29454a...,America/Vancouver,0
3,4aa98676f964a520105420e3,"[{'id': 13035, 'name': 'Coffee Shop', 'short_n...",[],VeryLikelyOpen,225,"{'drop_off': {'latitude': 49.263256, 'longitud...",/v3/places/4aa98676f964a520105420e3,"{'address': '590 Broadway W', 'country': 'CA',...",Elysian Coffee Broadway,{},America/Vancouver,0
4,4f60fc8be4b0a9d090a1f850,"[{'id': 13003, 'name': 'Bar', 'short_name': 'B...",[],VeryLikelyOpen,250,"{'drop_off': {'latitude': 49.263262, 'longitud...",/v3/places/4f60fc8be4b0a9d090a1f850,"{'address': '602W Broadway W', 'country': 'CA'...",Rogue Kitchen & Wetbar,{},America/Vancouver,0


In [115]:
# As we can see, each row may contain more than one value of interest for us:

df_4s['categories'][0]

[{'id': 13034,
  'name': 'Café',
  'short_name': 'Café',
  'plural_name': 'Cafés',
  'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/cafe_',
   'suffix': '.png'}},
 {'id': 13263,
  'name': 'Japanese Restaurant',
  'short_name': 'Japanese',
  'plural_name': 'Japanese Restaurants',
  'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/japanese_',
   'suffix': '.png'}}]

In [122]:
# Will apply a lambda function to pull all the category values and save them in a new column named 'category_ids':

df_4s['category_ids'] = df_4s['categories'].apply(lambda categories: [category['id'] for category in categories])


# The only columns of interest are 'category_ids' and 'bikes_id', the rest can be dropped:

df_4s = df_4s[['bikes_id', 'category_ids']].copy()

df_4s

       bikes_id           category_ids
0             0         [13034, 13263]
1             0                [13306]
2             0  [17069, 17071, 17077]
3             0         [13035, 13065]
4             0         [13003, 13065]
...         ...                    ...
24304       243                [13057]
24305       243                [13276]
24306       243  [13002, 13034, 13065]
24307       243                [17018]
24308       243  [13002, 13034, 13049]

[24309 rows x 2 columns]


In [128]:
# As we can see, we still have more than one category id value per row, we'll 'explode' it to solve this:
# Also, the row index is duplicated, so it'll be resetted:

df_4s = df_4s.explode('category_ids')
df_4s = df_4s.reset_index(drop=True)
df_4s

Unnamed: 0,bikes_id,category_ids
0,0,13034
1,0,13263
2,0,13306
3,0,17069
4,0,17071
...,...,...
41027,243,13065
41028,243,17018
41029,243,13002
41030,243,13034


In [130]:
# Lastly, export as csv file:

df_4s.to_csv('four_square.csv')

-----------------------------

# Comparing Results

#### Which API provided you with more complete data? Provide an explanation.

In my opinion Yelp has given me more useful data to work with such as "reviews, distance, price & rating", which I will most likely use in my study case. 

At the same time, it was easier to work with the information pulled from Yelp than with Foursquare's. 

The only backside is that the limit of data that you can retrive from Yelp is very low so I had to use a second email address.