In [76]:
# imports

import pandas as pd
import requests
import os

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [77]:
# Load csv file from local machine

data = pd.read_csv('C:\\Users\\affuy\\Documents\\Data_Sets\\df_2.csv')
# View make up and structure of the file
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 394 entries, 0 to 393
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    394 non-null    int64  
 1   station_name  394 non-null    object 
 2   Latitude      394 non-null    float64
 3   Longitude     394 non-null    float64
 4   Total Bikes   394 non-null    int64  
dtypes: float64(2), int64(2), object(1)
memory usage: 15.5+ KB


In [78]:
# Visualize the DataFrame
data.head(5)

Unnamed: 0.1,Unnamed: 0,station_name,Latitude,Longitude,Total Bikes
0,0,2 - Miguel Moya,40.420589,-3.705842,27
1,1,3 - Plaza Conde Suchil,40.430294,-3.706917,19
2,2,6 - Colegio Arquitectos,40.424148,-3.698447,19
3,3,7 - Hortaleza,40.425191,-3.697771,19
4,4,9 - Plaza de San Miguel,40.415606,-3.709508,24


In [79]:
# Create a new column concatenating latitude and longitude into a comma-separated string
data['ll'] = data['Latitude'].astype(str) + ',' + data['Longitude'].astype(str)
data.head()


Unnamed: 0.1,Unnamed: 0,station_name,Latitude,Longitude,Total Bikes,ll
0,0,2 - Miguel Moya,40.420589,-3.705842,27,"40.4205886,-3.7058415"
1,1,3 - Plaza Conde Suchil,40.430294,-3.706917,19,"40.4302937,-3.7069171"
2,2,6 - Colegio Arquitectos,40.424148,-3.698447,19,"40.424148,-3.698447"
3,3,7 - Hortaleza,40.425191,-3.697771,19,"40.4251906,-3.6977715"
4,4,9 - Plaza de San Miguel,40.415606,-3.709508,24,"40.4156057,-3.7095084"


In [80]:
# Drop unwanted columns
updated_data = data.drop(['Unnamed: 0', 'Latitude', 'Longitude'], axis=1)

updated_data

Unnamed: 0,station_name,Total Bikes,ll
0,2 - Miguel Moya,27,"40.4205886,-3.7058415"
1,3 - Plaza Conde Suchil,19,"40.4302937,-3.7069171"
2,6 - Colegio Arquitectos,19,"40.424148,-3.698447"
3,7 - Hortaleza,19,"40.4251906,-3.6977715"
4,9 - Plaza de San Miguel,24,"40.4156057,-3.7095084"
...,...,...,...
389,"598 - Calle San Modesto, 42",24,"40.4860555,-3.692627"
390,"494 - Arturo Soria, 330",23,"40.48038551,-3.66724784"
391,272 - Calle del tejo,23,"40.3980005,-3.6780275"
392,"559 - Calle Babilonia, 19",23,"40.4675299,-3.5882347"


In [81]:
# Make the requst on Foursquare QUERY PARAMS, inputing the ll=40.4205886,-3.7058415, radius 1000, categories 13000

import requests
import os

api_key = os.environ["FOURSQUARE_API_KEY"]

url = "https://api.foursquare.com/v3/places/search?ll=40.4205886%2C-3.7058415&radius=1000&categories=13000&sort=RATING&limit=50"

headers = {
    "accept": "application/json",
    "Authorization": api_key
}

response = requests.get(url, headers=headers)


response = response.json()
response


{'results': [{'fsq_id': '57756e14498e605edcad54fc',
   'categories': [{'id': 13009,
     'name': 'Cocktail Bar',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/cocktails_',
      'suffix': '.png'}},
    {'id': 13057,
     'name': 'Gastropub',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/gastropub_',
      'suffix': '.png'}},
    {'id': 13347,
     'name': 'Tapas Restaurant',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/tapas_',
      'suffix': '.png'}}],
   'chains': [],
   'distance': 830,
   'geocodes': {'drop_off': {'latitude': 40.414873, 'longitude': -3.699567},
    'main': {'latitude': 40.414875, 'longitude': -3.699518},
    'roof': {'latitude': 40.414875, 'longitude': -3.699518}},
   'link': '/v3/places/57756e14498e605edcad54fc',
   'location': {'address': 'Calle Echegaray, 21',
    'admin_region': 'Comunidad de Madrid',
    'country': 'ES',
    'cross_street': '',
    'formatted_address': 'Calle Echegaray

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [82]:
import pandas as pd

def categorize_venues(categories):
    # Define category groupings
    category_groups = {
        'Cocktail Bar': 'Bars',
        'Gastropub': 'Bars',
        'Nightclub': 'Nightclubs',
        'Restaurant': 'Restaurants'
    }
    
    for category in categories:
        category_name = category.get('name', 'Other')
        if category_name in category_groups:
            return category_groups[category_name]
    
    return 'Other'  # If no matching category found, assign to 'Other' category

def extract_data(response):
    data_list = []
    nearby_points_of_interest = []

    for result in response['results']:
        name = result.get('name', 'N/A')
        
        # Extracting location information
        location_info = result.get('location', {})
        address = location_info.get('address', 'N/A')
        city = location_info.get('locality', 'N/A')
        country = location_info.get('country', 'N/A')
        
        # Extracting ranking information (if available)
        ranking_info = result.get('ranking', {})
        ranking = ranking_info.get('rank', 'N/A')

        # Extracting categories
        categories = result.get('categories', [])
        
        # Categorize the venue based on categories
        category = categorize_venues(categories)

        data_list.append({'Name': name, 'Address': address, 'City': city, 'Country': country, 'Ranking': ranking, 'Categories': category})
        
        # Extracting nearby points of interest
        nearby_info = result.get('nearby_points_of_interest', [])
        nearby_points_of_interest.extend(nearby_info)

    # Create a DataFrame from the extracted data
    df_fs = pd.DataFrame(data_list)
    
    # Create a DataFrame for nearby points of interest
    poi_df = pd.DataFrame(nearby_points_of_interest, columns=['Point of Interest'])
    
    return df_fs, poi_df

# Assuming you have your response stored in a variable called 'response'
df_fs, poi_df = extract_data(response)

# Display the resulting DataFrames
print("Venues:")
print(df_fs)

print("\nNearby Points of Interest:")
print(poi_df)


Venues:
                                             Name  \
0                                     Salmon Guru   
1                                      La Primera   
2                                      HanSo Café   
3                                   La Pescadería   
4                                         Amorino   
5                                        La Mayor   
6                             Bodega de la Ardosa   
7                                    Rosi la Loca   
8                  Azotea Círculo de Bellas Artes   
9                                       Valdemeso   
10                       Arrocería Marina Ventura   
11                               360° Rooftop Bar   
12                                         Lupita   
13                      Zenith Brunch & Cocktails   
14                       Honest Greens Gran Via 7   
15                                     Casa Labra   
16                                        Faborit   
17                                    

Put your parsed results into a DataFrame

In [83]:
# Create a DataFrame from the extracted data
df_fs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        50 non-null     object
 1   Address     50 non-null     object
 2   City        50 non-null     object
 3   Country     50 non-null     object
 4   Ranking     50 non-null     object
 5   Categories  50 non-null     object
dtypes: object(6)
memory usage: 2.5+ KB


In [84]:
# Save this as a csv file

import pandas as pd

# Save the DataFrame as a CSV file
df_fs.to_csv('C:\\Users\\affuy\\Documents\\Data_Sets\\df_foursquare.csv', index=False)


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [85]:
import requests
import os

yelp_api_key = os.environ["YELP_API_KEY"]
#url = "https://api.yelp.com/v3/businesses/search?latitude=40.4205886&longitude=-3.7058415&radius=1000&categories=restaurants%2Cbars%2Clandmarks&sort_by=rating&limit=50"
url = "https://api.yelp.com/v3/businesses/search?latitude=40.4205886&longitude=-3.7058415&radius=1000&categories=Restaurant%2CBars%2CNightclub&sort_by=rating&limit=50"

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {yelp_api_key}"  # Replace with your actual Yelp API access token
}

response = requests.get(url, headers=headers)

response = response.json()

In [86]:
response

{'businesses': [{'id': 'TxjCXZF4b3paXN6n145bhA',
   'alias': 'sabe-a-gloria-madrid',
   'name': 'Sabe a Gloria',
   'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/qnBtnVPA-2URE9a5U8x3aA/o.jpg',
   'is_closed': False,
   'url': 'https://www.yelp.com/biz/sabe-a-gloria-madrid?adjust_creative=HtGBEqBv3zF4YDHhPbDfHg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=HtGBEqBv3zF4YDHhPbDfHg',
   'review_count': 8,
   'categories': [{'alias': 'tabernas', 'title': 'Tabernas'},
    {'alias': 'arroceria_paella', 'title': 'Arroceria / Paella'},
    {'alias': 'tapas', 'title': 'Tapas Bars'}],
   'rating': 5.0,
   'coordinates': {'latitude': 40.4132689307325,
    'longitude': -3.69666307238731},
   'transactions': [],
   'location': {'address1': 'Calle de las Huertas, 53',
    'address2': '',
    'address3': None,
    'city': 'Madrid',
    'zip_code': '28014',
    'country': 'ES',
    'state': 'M',
    'display_address': ['Calle de las Huertas, 53', '28014 Madrid', 'Spain']

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [87]:
def extract_business_info(yelp_response):
    yelp_data = yelp_response.get('businesses', [])

    extracted_info = []

    for business in yelp_data:
        name = business['name']
        categories = [category['title'] for category in business['categories']]
        address = ", ".join(business['location']['display_address'])
        ranking = business['rating']

        # Determine the type of establishment (restaurant, bar, nightclub, etc.)
        establishment_type = "Unknown"
        for category in categories:
            if "restaurant" in category.lower():
                establishment_type = "Restaurant"
            elif "bar" in category.lower():
                establishment_type = "Bar"
            elif "nightclub" in category.lower():
                establishment_type = "Nightclub"
            # Add more categories as needed

        # Create a dictionary with the extracted information
        business_info = {
            "name": name,
            "type": establishment_type,
            "address": address,
            "ranking": ranking
        }

        extracted_info.append(business_info)
    df_yelp = pd.DataFrame(extracted_info)    

    return df_yelp


Put your parsed results into a DataFrame

In [88]:

# Assuming you have a Yelp response stored in a variable called 'yelp_response'
business_info_list = extract_business_info(response)

# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(business_info_list)

# Display the DataFrame
df.head(4)


Unnamed: 0,name,type,address,ranking
0,Sabe a Gloria,Bar,"Calle de las Huertas, 53, 28014 Madrid, Spain",5.0
1,Blow Dry Bar,Unknown,"Calle de Pelayo, 76, 28004 Madrid, Spain",5.0
2,Oh Galo!,Bar,"Calle Pelayo, 72, 28004 Madrid, Spain",5.0
3,Jardines del Príncipe Anglona,Unknown,"Plaza de la Paja, 28005 Madrid, Spain",5.0


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

##### I have found that both the Foursquare and Yelp APIs provide valuable data for various purposes. The choice of which one to use depends on your specific needs and the type of data you require. 

##### The Yelp API has been particularly helpful for me. It provided me with information primarily related to businesses and places, with a strong focus on user-generated reviews and ratings. This includes detailed information about businesses such as their names, addresses, contact details, ratings, reviews, and categories. It has proven to be very useful when I needed to analyze and compare user opinions, ratings, and reviews about restaurants, bars, and other businesses. I've found it to be more suitable for applications related to the hospitality and food service industry.

##### On the other hand, the Foursquare API has also been quite valuable in my work. It offers a broader range of location-based data and insights beyond just businesses and places. In addition to business details, Foursquare has provided me with data related to check-ins, user-generated tips, user profiles, and location trends. This extensive dataset has been instrumental in exploring user behavior, tracking trends in check-in activity, and understanding user preferences in a more general sense. It's a versatile choice for applications that require a deeper understanding of user engagement and location-based behavior.

##### In summary, if you primarily need detailed information about businesses, their ratings, and reviews, the Yelp API may be more suitable for your needs. On the other hand, if you're interested in a broader range of location-based data, including user activity and trends, the Foursquare API can provide valuable insights. Your choice should align with your specific analytical goals and the type of data that best serves your analysis."

Get the top 10 restaurants according to their rating

In [89]:

# Assuming you already have the DataFrame 'df' with the given structure
# Sort the DataFrame by 'ranking' column in ascending order
df.sort_values(by='ranking', ascending=True, inplace=True)

# Add a 'top 10' column
df['top 10'] = df['ranking'].rank(method='min') <= 10

# Display the sorted DataFrame
df.head(4)


Unnamed: 0,name,type,address,ranking,top 10
0,Sabe a Gloria,Bar,"Calle de las Huertas, 53, 28014 Madrid, Spain",5.0,True
27,Mexcalista,Unknown,"Calle del León, 5, 28014 Madrid, Spain",5.0,True
28,Plenti,Unknown,"Costanilla de los Desamparados, 13, 28014 Madr...",5.0,True
29,Asiana,Unknown,"Travesía de San Mateo, 4, 28004 Madrid, Spain",5.0,True


In [90]:

# Sort the DataFrame by 'ranking' column in ascending order
df.sort_values(by='ranking', ascending=True, inplace=True)

# Reset the index starting from 1
df.reset_index(drop=True, inplace=True)
df.index = df.index + 1

# Display the sorted DataFrame
df.head(4)


Unnamed: 0,name,type,address,ranking,top 10
1,Sabe a Gloria,Bar,"Calle de las Huertas, 53, 28014 Madrid, Spain",5.0,True
2,Oh Galo!,Bar,"Calle Pelayo, 72, 28004 Madrid, Spain",5.0,True
3,Jardines del Príncipe Anglona,Unknown,"Plaza de la Paja, 28005 Madrid, Spain",5.0,True
4,Hunan,Unknown,"Calle de la Ballesta, 4, 28002 Madrid, Spain",5.0,True


In [91]:
# Save this as a csv file

import pandas as pd

# Save the DataFrame as a CSV file
df.to_csv('C:\\Users\\affuy\\Documents\\Data_Sets\\df_Yelp.csv', index=False)
