In [None]:
# imports

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [44]:

# # Foursquare API Test (initial test request before batching stations)
# # This cell was used to test the API response format.
# # Can be skipped or deleted for final version.

# response = requests.get(...)
# print(response.status_code)
# print(response.json())


import requests

url = "https://places-api.foursquare.com/places/search"

headers = {
    "accept": "application/json",
    "X-Places-Api-Version": "2025-06-17",
    "authorization": "Bearer with open("api_keys/foursquare_api.txt") as f:
    fsq_api_key = f.read().strip()
"
}

params = {
    "ll": "45.4628,-73.5659", #Montreal
    "radius": 1000,
    "limit": 10,
    "query": "restaurant"
}
response = requests.get(url, headers=headers, params=params)
data = response.json()

print(response.text)



{"results":[{"fsq_place_id":"51d6f3dc498ea03f2ce3379a","latitude":45.46393651914137,"longitude":-73.56671059636794,"categories":[{"fsq_category_id":"4bf58dd8d48988d16a941735","name":"Bakery","short_name":"Bakery","plural_name":"Bakeries","icon":{"prefix":"https://ss3.4sqi.net/img/categories_v2/food/bakery_","suffix":".png"}}],"date_created":"2013-07-05","date_refreshed":"2025-04-02","distance":141,"extended_location":{},"link":"/places/51d6f3dc498ea03f2ce3379a","location":{"address":"3990 Wellington","locality":"Verdun","region":"QC","postcode":"H4G 1V3","country":"CA","formatted_address":"3990 Wellington (entre De l'Église & Hickson), Verdun QC H4G 1V3"},"name":"Boulangerie Pâtisserie Wellington","placemaker_url":"https://foursquare.com/placemakers/review-place/51d6f3dc498ea03f2ce3379a","related_places":{},"social_media":{"facebook_id":"260791237603649","twitter":""},"tel":"(514) 658-3646","website":"http://www.promenadewellington.com/fr/commerces/alimentation"},{"fsq_place_id":"4c812

In [51]:
# Can skip- honestly it was just a sanity check: print nicely formatted Foursquare JSON response- This API test took the longest to print sucessfully. 
# import json
# print(json.dumps(data, indent=2))


import json
print(json.dumps(data, indent=2))


{
  "results": [
    {
      "fsq_place_id": "51d6f3dc498ea03f2ce3379a",
      "latitude": 45.46393651914137,
      "longitude": -73.56671059636794,
      "categories": [
        {
          "fsq_category_id": "4bf58dd8d48988d16a941735",
          "name": "Bakery",
          "short_name": "Bakery",
          "plural_name": "Bakeries",
          "icon": {
            "prefix": "https://ss3.4sqi.net/img/categories_v2/food/bakery_",
            "suffix": ".png"
          }
        }
      ],
      "date_created": "2013-07-05",
      "date_refreshed": "2025-04-02",
      "distance": 141,
      "extended_location": {},
      "link": "/places/51d6f3dc498ea03f2ce3379a",
      "location": {
        "address": "3990 Wellington",
        "locality": "Verdun",
        "region": "QC",
        "postcode": "H4G 1V3",
        "country": "CA",
        "formatted_address": "3990 Wellington (entre De l'\u00c9glise & Hickson), Verdun QC H4G 1V3"
      },
      "name": "Boulangerie P\u00e2tisserie Welling

In [104]:
##Looping through all 1,004 bike stations to collect nearby restaurants (Foursquare) inital request did not yield enough POIs for sufficient results
# Estimated runtime: ~16-21minutes due to rate limiting (1-second delay per request)- perfect time to grab a snack or a beer.
# This cell queries the Foursquare API for each station’s latitude/longitude and saves the results

import pandas as pd
import requests
import time
from pandas import json_normalize

# Load your full bike station list
stations_df = pd.read_csv("../data/processed/stations_df.csv")
stations_df = stations_df.rename(columns={"id": "station_id"})

# Read API key from file (recommended) or paste directly
fsq_api_key = "with open("api_keys/foursquare_api.txt") as f:
    fsq_api_key = f.read().strip()


headers = {
    "Accept": "application/json",
    "Authorization": f"Bearer {fsq_api_key}",
    "X-Places-API-Version": "2025-06-17"
}

all_pois = []

# Loop through all stations
for i, row in stations_df.iterrows():
    lat, lon = row['latitude'], row['longitude']
    station_id = row['station_id']

    params = {
        "ll": f"{lat},{lon}",
        "radius": 500,
        "limit": 20,
        "query": "restaurant"
    }

    response = requests.get("https://places-api.foursquare.com/places/search", headers=headers, params=params)
    
    if response.status_code != 200:
        print(f"Error {response.status_code} at station {station_id}")
        continue

    results = response.json().get("results", [])
    pois = json_normalize(results, sep=".")

    if not pois.empty:
        pois = pois[[
            "fsq_place_id",
            "name",
            "location.formatted_address",
            "latitude",
            "longitude",
            "distance"
        ]]
        pois["station_id"] = station_id
        all_pois.append(pois)
    
    print(f"Collected {len(pois)} POIs for station {station_id}")
    time.sleep(1)  # Respect API rate limits

# Save all results
if all_pois:
    fsq_df = pd.concat(all_pois, ignore_index=True)
    fsq_df.to_csv("../data_collection_and_API/foursquare_restaurants.csv", index=False)
    print("✅ Saved all Foursquare results to CSV.")
else:
    print("⚠️ No POIs collected.")


Collected 20 POIs for station 0013e5d100f68121835052a381ab2f23
Collected 20 POIs for station 0040082e616edec293c0c74fcbf825c4
Collected 3 POIs for station 00b7595b843f69c9041fca9e4e84700b
Collected 20 POIs for station 00c210cb99cf9d1b923c1548938aee56
Collected 5 POIs for station 00c84f03ca5970eaa144ed6867d1e2b9
Collected 20 POIs for station 00d9a9cfbe247f789b2354e2f4251c14
Collected 20 POIs for station 00e29ec2eb64a7c141edf1729b02cfd0
Collected 20 POIs for station 014e10dba2d92bd20c826b88864dc6b6
Collected 9 POIs for station 01eb2ebf1d0478ffdc9ea49c43a4b7ae
Collected 13 POIs for station 01f9b7e63833ad61e80a7963e2ad9b25
Collected 20 POIs for station 02044f52405851c50980c20964349a5d
Collected 11 POIs for station 0217b73736addb107209816f03000a18
Collected 12 POIs for station 0243c740e1d4fe272e0c414cdd900c7d
Collected 20 POIs for station 0274ec7d6cd70a5f7e5fc21edc47b801
Collected 0 POIs for station 02e0f09887bab2491c3cf2e6d0b477e4
Collected 20 POIs for station 03306a21cafcf901fa4a45504a834

In [111]:
fsq_summary = fsq_df.groupby("station_id").agg({
    "name": "count",
    "distance": "mean"
}).rename(columns={
    "name": "venue_count",
    "distance": "avg_distance"
}).reset_index()

fsq_summary.to_csv("../data/processed/foursquare_summary.csv", index=False)


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [109]:
#with finally enough stations I can carry out the following
# Parsing Yelp results — time to dig into Montreal's culinary scene.


results = response.json().get("results", [])
pois = json_normalize(results, sep=".")

# Select available fields
pois = pois[[
    "fsq_place_id",
    "name",
    "location.formatted_address",
    "distance"
]]

# Add the station_id
pois["station_id"] = station_id
all_pois.append(pois)





Put your parsed results into a DataFrame

In [52]:
import pandas as pd
from pandas import json_normalize

# Flatten with dot notation
fsq_df = json_normalize(data["results"], sep=".")

# Select your desired columns
fsq_df = fsq_df[[
    "name",
    "location.formatted_address",
    "latitude",
    "longitude",
    "distance"
]]

# Save it to a CSV
fsq_df.to_csv("../data/processed/foursquare_sample.csv", index=False)
print("✅ Saved to ../data/processed/foursquare_sample.csv")
fsq_df.head()



✅ Saved to ../data/processed/foursquare_sample.csv


Unnamed: 0,name,location.formatted_address,latitude,longitude,distance
0,Boulangerie Pâtisserie Wellington,"3990 Wellington (entre De l'Église & Hickson),...",45.463937,-73.566711,141
1,Café la Tazza,"3922, rue Wellington (entre de l'Église & Hick...",45.464619,-73.566636,210
2,Garage Café,"275 rue Hickson (coin Wellington), Verdun QC H...",45.464982,-73.567522,273
3,Bossa Prêt-À-Manger,"4354 Wellington, Verdun QC H4G 1W4",45.460468,-73.566939,271
4,Nouveau Delhi Express,"3876 Rue Wellington, Verdun QC H4G 1V2",45.465245,-73.566607,277


In [75]:
import pandas as pd

# Load raw Foursquare POIs
fsq_df = pd.read_csv("../data_collection_and_API/foursquare_restaurants.csv")

# Aggregate per station
fsq_summary = fsq_df.groupby("station_id").agg({
    "name": "count",
    "distance": "mean"
}).rename(columns={
    "name": "venue_count",
    "distance": "avg_distance"
}).reset_index()

# Save for joining
fsq_summary.to_csv("../data/processed/foursquare_summary.csv", index=False)
print("Saved to ../data/processed/foursquare_summary.csv")


Saved to ../data/processed/foursquare_summary.csv


# Yelp

In [73]:
yelp_df.to_csv("../data_collection_and_API/yelp_restaurants.csv", index=False)
print("Saved Yelp data to yelp_restaurants.csv")


✅ Saved Yelp data to yelp_restaurants.csv


In [74]:
# Save Yelp API key (do this only once)
YELP_API_KEY = "with open("api_keys/yelp_api.txt") as f:
    YELP_API_KEY = f.read().strip()
"

with open("../data_collection_and_API/yelp_api_key.txt", "w") as f:
    f.write(YELP_API_KEY)

print("Yelp API key saved to yelp_api_key.txt")


✅ Yelp API key saved to yelp_api_key.txt


Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [157]:
#testing out Yelp API as the credits are very limited before looping multiple stations

!pip install requests

import requests

print("Starting Yelp API test...")

YELP_API_KEY = "with open("api_keys/yelp_api.txt") as f:
    YELP_API_KEY = f.read().strip()
"

headers = {
    "Authorization": f"Bearer {YELP_API_KEY}"
}

params = {
    "term": "restaurants",
    "latitude": 45.50884,
    "longitude": -73.58781,
    "radius": 1000,
    "limit": 10
}

url = "https://api.yelp.com/v3/businesses/search"

try:
    response = requests.get(url, headers=headers, params=params)
    print("Status:", response.status_code)

    if response.status_code == 200:
        data = response.json()
        print(f"Businesses found: {len(data.get('businesses', []))}")
        print("First business:", data['businesses'][0]['name'] if data['businesses'] else "None")
    else:
        print("Error:", response.text)

except Exception as e:
    print("❌ Something went wrong:", e)# Inside the loop
data = response.json()
businesses = json_normalize(data.get("businesses", []))

print("Yelp API test completed.")

Starting Yelp API test...
Status: 200
Businesses found: 10
First business: Le Majestique
Yelp API test completed.


In [3]:
#Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

# Imports

import pandas as pd
import requests
import time
from pandas import json_normalize

# Load your bike stations
stations_df = pd.read_csv("../data/processed/stations_df.csv")
stations_df = stations_df.rename(columns={"id": "station_id"})
stations_df["station_id"] = stations_df["station_id"].astype(str)

# Your Yelp API key
YELP_API_KEY = "with open("api_keys/yelp_api.txt") as f:
    YELP_API_KEY = f.read().strip()
"

headers = {
    "Authorization": f"Bearer {YELP_API_KEY}"
}

# Start collecting Yelp results- called on 100 stations to have a decent sample without running the risk of surpassing the allowed credits- would have done more if the limit was higher
all_results = []

for i, row in stations_df.head(100).iterrows():
    lat = row["latitude"]
    lon = row["longitude"]
    station_id = row["station_id"]

    print(f"Requesting Yelp for station {station_id} at lat={lat}, lon={lon}")

    params = {
        "term": "restaurants",
        "latitude": lat,
        "longitude": lon,
        "radius": 1000,
        "limit": 20
    }

    response = requests.get("https://api.yelp.com/v3/businesses/search", headers=headers, params=params)

    if response.status_code == 200:
        data = response.json()
        businesses = json_normalize(data.get("businesses", []))

        print(f"  ➤ Found {len(businesses)} businesses")

        if not businesses.empty:
            pois = pd.DataFrame({
                "name": businesses["name"],
                "rating": businesses["rating"],
                "reviews": businesses["review_count"],
                "category": businesses["categories"].apply(lambda x: x[0]["title"] if x else None),
                "address": businesses["location.display_address"].apply(lambda x: ", ".join(x) if isinstance(x, list) else None),
                "lat": businesses["coordinates.latitude"],
                "lon": businesses["coordinates.longitude"],
                "station_id": station_id
            })
            all_results.append(pois)
    else:
        print(f"Error {response.status_code} for station {station_id}")

    time.sleep(1)

# Combine and save
if all_results:
    yelp_df = pd.concat(all_results, ignore_index=True)
    yelp_df.to_csv("../data_collection_and_API/yelp_restaurants.csv", index=False)
    print("Yelp data saved to ../data_collection_and_API/yelp_restaurants.csv")
else:
    print("No data was collected.")



Requesting Yelp for station 0013e5d100f68121835052a381ab2f23 at lat=45.4628312022419, lon=-73.56593772768973
  ➤ Found 20 businesses
Requesting Yelp for station 0040082e616edec293c0c74fcbf825c4 at lat=45.50208503935104, lon=-73.56294138360681
  ➤ Found 20 businesses
Requesting Yelp for station 00b7595b843f69c9041fca9e4e84700b at lat=45.507437367855864, lon=-73.63265757772751
  ➤ Found 20 businesses
Requesting Yelp for station 00c210cb99cf9d1b923c1548938aee56 at lat=45.53519006163501, lon=-73.61548215150833
  ➤ Found 20 businesses
Requesting Yelp for station 00c84f03ca5970eaa144ed6867d1e2b9 at lat=45.613330089384625, lon=-73.45154017210007
  ➤ Found 16 businesses
Requesting Yelp for station 00d9a9cfbe247f789b2354e2f4251c14 at lat=45.550393662727416, lon=-73.5737616321785
  ➤ Found 20 businesses
Requesting Yelp for station 00e29ec2eb64a7c141edf1729b02cfd0 at lat=45.49947991442336, lon=-73.5759771368248
  ➤ Found 20 businesses
Requesting Yelp for station 014e10dba2d92bd20c826b88864dc6b6 a

Put your parsed results in a DataFrame 

In [4]:
# Combine all parsed POIs into a single DataFrame
if all_results:
    yelp_df = pd.concat(all_results, ignore_index=True)
    print(f"Combined Yelp POI DataFrame shape: {yelp_df.shape}")
    display(yelp_df.head())
else:
    print("No data collected — check earlier cells.")



Combined Yelp POI DataFrame shape: (1970, 8)


Unnamed: 0,name,rating,reviews,category,address,lat,lon,station_id
0,Les Street Monkeys,4.4,84,Cambodian,"3625 Rue Wellington, Montreal, QC H4G 1T9, Canada",45.46767,-73.56688,0013e5d100f68121835052a381ab2f23
1,Beba,4.7,32,Argentine,"3900 Rue Éthel, Montreal, QC H4G 2J6, Canada",45.464869,-73.567814,0013e5d100f68121835052a381ab2f23
2,Bistro Entre Ciel et Terre,4.1,34,French,"750 Rue de l'Eglise, Verdun, QC H4G 2M8, Canada",45.462962,-73.573034,0013e5d100f68121835052a381ab2f23
3,Janine Café,4.6,72,Breakfast & Brunch,"3900 Rue Wellington, Montreal, QC H4G 1V3, Canada",45.46475,-73.56657,0013e5d100f68121835052a381ab2f23
4,Les Délices de l'Île Maurice,4.3,20,Mauritius,"272 Rue Hickson, Verdun, QC H4G 2J6, Canada",45.46479,-73.56747,0013e5d100f68121835052a381ab2f23


In [6]:
yelp_df.to_csv("../data_collection_and_API/yelp_restaurants.csv", index=False)
print("✅ Yelp data saved to ../data_collection_and_API/yelp_restaurants.csv")


✅ Yelp data saved to ../data_collection_and_API/yelp_restaurants.csv


In [7]:
# Summarize Yelp results per station
yelp_summary = yelp_df.groupby("station_id").agg({
    "name": "count",
    "rating": "mean",
    "reviews": "mean"
}).rename(columns={
    "name": "venue_count",
    "rating": "avg_rating",
    "reviews": "avg_reviews"
}).reset_index()

# Save summary to processed folder
yelp_summary.to_csv("../data/processed/yelp_summary.csv", index=False)
print("✅ Yelp summary saved to ../data/processed/yelp_summary.csv")


✅ Yelp summary saved to ../data/processed/yelp_summary.csv


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

At this time foursquare gave more information on multiple stations due to increased limit allowance, however, vital information such as ratings as not avaiable with the free account which was available with yelp. 

Get the top 10 restaurants according to their rating

In [9]:
# Save top 10 unique restaurants to CSV- I took the top ten out of all stations and deleted duplicates that came up- FYI Caifan came up 7 times ont he list when I did allow duplicates
# all based on the collective wisdom (and strong opinions) of internet strangers.

top_10_unique.to_csv("../data/processed/top_10_yelp_restaurants.csv", index=False)
print("✅ Top 10 unique restaurants saved to ../data/processed/top_10_yelp_restaurants.csv")
# Load the full Yelp venues file (if not already loaded)
yelp_df = pd.read_csv("../data_collection_and_API/yelp_restaurants.csv")

# Convert station_id to string if needed
yelp_df["station_id"] = yelp_df["station_id"].astype(str)

# Drop rows missing ratings
filtered = yelp_df.dropna(subset=["rating", "reviews"])

# Sort by rating, then review count (descending)
top_10 = filtered.sort_values(by=["rating", "reviews"], ascending=[False, False]).head(10)

# Remove duplicates by name + address (you could also use Yelp ID if available)
deduped = yelp_df.drop_duplicates(subset=["name", "address"])

# Drop rows missing rating or review info
deduped = deduped.dropna(subset=["rating", "reviews"])

# Sort by rating and review count
top_10_unique = deduped.sort_values(by=["rating", "reviews"], ascending=[False, False]).head(10)

# Show results
top_10_unique[["name", "rating", "reviews", "category", "address"]]


Unnamed: 0,name,rating,reviews,category,address
1515,Ohana,5.0,22,Sushi Bars,"330 Ave Mont-Royal E, Montreal, QC H2T 1P8, Ca..."
511,Caifan,5.0,9,Mexican,"4542 Rue Saint-Denis, Montreal, QC H2J 2L3, Ca..."
35,Nama Omakase,5.0,8,Japanese,"425 Viger Ouest, Montreal, QC H2Z 1W5, Canada"
875,Rendez-vous Bistro Indian Cuisine Redefined,5.0,8,Indian,"3443 Saint Denis Street, Montreal, QC H2X 3L1,..."
1641,Cabane St-Louis,5.0,8,Pizza,"85 Rue Saint-Louis, Saint-Eustache, QC J7R 1X8..."
950,NDG Luncheonette,5.0,6,Breakfast & Brunch,"6800 Fielding Avenue, Montreal, QC H4V 1N9, Ca..."
1056,Le Chou de Bruxelles,5.0,6,Belgian,"1461 Rue Galt O, Sherbrooke, QC J1H 2A9, Canada"
938,Seoul Dosirak,5.0,4,Korean,"3281 Boulevard Cavendish, Montreal, QC H4B 2M5..."
1210,30 Juin,5.0,4,African,"1694 Rue Sainte-Catherine E, Montreal, QC H2L ..."
202,Rotisserie La Lune,5.0,3,French,"391 Rue Saint-Zotique E, Montreal, QC H2S 1L8,..."
