In [14]:
import requests
import json 
import pandas as pd
import requests 
import requests
import os

In [2]:
filtered_df= pd.read_csv('../data/Bikes.csv')

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [3]:
def get_venues_fs(latitude, longitude, radius, api_key, categories, station_name):
    url = "https://api.foursquare.com/v3/places/search"
    headers = {"Authorization": api_key}
    params = {
        'll': f"{latitude},{longitude}",
        'radius': radius,
        'categories': categories,
        'limit': 50  # Adjust based on your needs
    }

    response = requests.get(url, headers=headers, params=params)
    results = []
    if response.status_code == 200:
        places = response.json().get('results', [])
        for place in places:
            name = place.get('name')
            categories_list = [category.get('name') for category in place.get('categories', [])]
            category_names = ', '.join(categories_list)  # Concatenating all category names
            results.append({
                "name": name,
                "categories": category_names,  # Adjusted to match the Yelp structure
                "station_name": station_name  # Include station name in the result
            })
    else:
        print(f"Failed to fetch data. Status code: {response.status_code}")
    return results

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [4]:
name_list_fs = []
categories_list_fs = []
station_list_fs = []


categories = "13003,16032,16020"  
radius = 500  # Radius in meters
api_key = os.getenv("LHL_project") 


for index, row in filtered_df.iterrows():
    venues = get_venues_fs(row['Latitude'], row['Longitude'], radius, api_key, categories, row['Station Name'])
    for venue in venues:
        name_list_fs.append(venue['name'])
        categories_list_fs.append(venue['categories'])
        station_list_fs.append(venue['station_name'])

df_foursquare = pd.DataFrame({
    'name': name_list_fs,
    'station_name': station_list_fs,
    'categories': categories_list_fs
})


df_foursquare.to_csv('foursquare_data.csv', index=False)


print(df_foursquare.head())

                   name   station_name                categories
0  Parc de la Citadelle  Place Islande                      Park
1          Pub la Belle  Place Islande                       Pub
2          Le BarÔmètre  Place Islande  Bar, Brewery, Restaurant
3              La Bamba  Place Islande                       Bar
4      Le Quinze-Quinze  Place Islande           Bar, Restaurant


Put your parsed results into a DataFrame

In [5]:
df_foursquare.to_csv('yelp_data.csv', index=False)
df_foursquare.head(30)

Unnamed: 0,name,station_name,categories
0,Parc de la Citadelle,Place Islande,Park
1,Pub la Belle,Place Islande,Pub
2,Le BarÔmètre,Place Islande,"Bar, Brewery, Restaurant"
3,La Bamba,Place Islande,Bar
4,Le Quinze-Quinze,Place Islande,"Bar, Restaurant"
5,SPYL,Place Islande,"Night Club, Gay Bar"
6,Académie de la Bière Cathédrale,Parking Broglie,"Sports Bar, Brewery, Restaurant"
7,Code Bar,Parking Broglie,"Cocktail Bar, Lounge"
8,L'Epicerie,Parking Broglie,"Bar, Brasserie"
9,La Nouvelle Poste,Parking Broglie,"Bar, Pizzeria, Brasserie"


In [6]:
df_foursquare.describe()

Unnamed: 0,name,station_name,categories
count,302,302,302
unique,171,22,87
top,Naoko,Centre,Bar
freq,4,45,42


In [7]:
duplicates_all = df_foursquare.duplicated()
print(f"Are there any duplicates (considering all columns)? {duplicates_all.any()}")

Are there any duplicates (considering all columns)? False


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [8]:
def query_yelp_categories(latitude, longitude, categories, radius=500):
    
    api_key = os.getenv('Yelp_key')  
    url = "https://api.yelp.com/v3/businesses/search"
    headers = {"Authorization": f"Bearer {api_key}"}
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "categories": categories,
        "radius": radius,
        "limit": 50 
    }

    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        businesses = response.json()['businesses']
        results = []
        for business in businesses:
            name = business['name']
            categories_list = [category['title'] for category in business['categories']]
            results.append({"name": name, "categories": ', '.join(categories_list)})
        return results
    else:
        print(f"Failed to fetch data: {response.status_code}")
        return []


In [10]:
df_yelp.describe()

Unnamed: 0,name,station_name,categories
count,272,272,272
unique,141,20,81
top,So Divine,Centre,Bars
freq,4,50,39


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [9]:
# Initialize lists for storing data
name_list = []
cat_list = []
loc_list = []

# Iterating over each row in filtered_df(the table for city bike to use lat and long to find places near by) to query Yelp

for index, row in filtered_df.iterrows():
    station_name = row['Station Name']
    latitude = row['Latitude']
    longitude = row['Longitude']
    categories = "bars,parks,landmarks"
    
    # Query Yelp for the categories based on the station's location
    yelp_results = query_yelp_categories(latitude, longitude, categories, radius=500)
    
    for result in yelp_results:
        name_list.append(result['name'])
        cat_list.append(result['categories'])
        loc_list.append(station_name)

# Create a DataFrame with the collected data
df_yelp = pd.DataFrame({
    'name': name_list,
    'station_name': loc_list,
    'categories': cat_list
})
df_yelp.to_csv('yelp_data.csv', index=False)
df_yelp

Unnamed: 0,name,station_name,categories
0,The Irish Pub,Place Islande,Pubs
1,Mezzanine of London,Place Islande,"Bars, Dance Clubs, Tapas Bars"
2,Spyl,Place Islande,"Dance Clubs, Bars"
3,Le Barômètre,Place Islande,"Bars, Brasseries, Bistros"
4,imada,Place Islande,Landmarks & Historical Buildings
...,...,...,...
267,Fiesta Makassi Bar,Gare Nord,"Sports Bars, Cocktail Bars"
268,La Solidarité,Gare Nord,"French, Bars"
269,Le Garde Fou,Gare Nord,Pubs
270,Brasserie WOW,Gare Nord,"Bars, Brasseries"


In [15]:
df=df_yelp.to_csv('../data/Yelp.csv')

In [18]:
df_yelp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 272 entries, 0 to 271
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   name          272 non-null    object
 1   station_name  272 non-null    object
 2   categories    272 non-null    object
dtypes: object(3)
memory usage: 6.5+ KB


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

The forsqure have returned with more data compeared to yelp.

Get the top 10 restaurants according to their rating

In [19]:
import pandas as pd
import requests
import os

def get_top_rated_venues(latitude, longitude, radius, api_key, categories, station_name, limit=50):
    url = "https://api.foursquare.com/v3/places/search"
    headers = {"Authorization": api_key}
    params = {
        'll': f"{latitude},{longitude}",
        'radius': radius,
        'categories': categories,
        'limit': limit,
    }

    response = requests.get(url, headers=headers, params=params)
    venues = []
    if response.status_code == 200:
        places = response.json().get('results', [])
        for place in places:
            name = place.get('name')
            rating = place.get('rating', 0)  
            categories_list = [category.get('name') for category in place.get('categories', [])]
            category_names = ', '.join(categories_list)
            venues.append({
                "name": name,
                "rating": rating,
                "categories": category_names,
                "station_name": station_name
            })
       
        return sorted(venues, key=lambda x: x['rating'], reverse=True)[:10]
    else:
        print(f"Failed to fetch data. Status code: {response.status_code}")
        return []


api_key = os.getenv("LHL_project")
categories = "13026"  
radius = 1000  


name_list_fs = []
categories_list_fs = []
station_list_fs = []
rating_list_fs = []


for index, row in filtered_df.iterrows():
    venues = get_top_rated_venues(row['Latitude'], row['Longitude'], radius, api_key, categories, row['Station Name'])
    for venue in venues:
        name_list_fs.append(venue['name'])
        categories_list_fs.append(venue['categories'])
        station_list_fs.append(venue['station_name'])
        rating_list_fs.append(venue['rating'])


df_foursquare = pd.DataFrame({
    'name': name_list_fs,
    'station_name': station_list_fs,
    'categories': categories_list_fs,
    'rating': rating_list_fs
})


df_foursquare_sorted = df_foursquare.sort_values(by='rating', ascending=False).reset_index(drop=True)

print(df_foursquare_sorted.head(10))



                        name         station_name  \
0        Steakhouse District      Parking Broglie   
1      Restaurant au Tilleul        Hoenheim Gare   
2        Steakhouse District   Parking Austerlitz   
3        Steakhouse District   Gare de Strasbourg   
4        Steakhouse District               Centre   
5        Steakhouse District             Gare Sud   
6        Steakhouse District           Les Halles   
7        Steakhouse District            Gare Nord   
8  Villa Schmidt Gastro GmbH  Auberge de jeunesse   

                               categories  rating  
0                   BBQ Joint, Steakhouse       0  
1  BBQ Joint, Pizzeria, French Restaurant       0  
2                   BBQ Joint, Steakhouse       0  
3                   BBQ Joint, Steakhouse       0  
4                   BBQ Joint, Steakhouse       0  
5                   BBQ Joint, Steakhouse       0  
6                   BBQ Joint, Steakhouse       0  
7                   BBQ Joint, Steakhouse       0  
8