In [None]:
# imports
import os
import requests
import pandas
import json
from pandas import json_normalize

# Set Environment Variables (i.e. API Keys)
api_key_4sq = input("Enter your Foursquare API key (or leave blank to skip): ").strip()

if api_key_4sq:
    os.environ["MY_4SQ_API_KEY"] = api_key_4sq
    print("Foursquare API key set as environment variable.")
else:
    print("No Foursquare API key provided. Skipped.")

api_key_yelp = input("Enter your Yelp API key (or leave blank to skip): ").strip()

if api_key_yelp:
    os.environ["MY_YELP_API_KEY"] = api_key_yelp
    print("Yelp API key set as environment variable.")
else:
    print("No Yelp API key provided. Skipped.")

# Load the JSON from Part 1 for the Osaka (via Docomo Bike Share) network's station data
with open("osaka_docomo_stations.json", "r", encoding="utf-8") as f:
    data = json.load(f)
    stations_data = data

# Limiting to the first 5 stations only for ease of data analysis AND limiting future calls to limited API
stations = [
    {
        "name": station["name"],
        "latitude": station["latitude"],
        "longitude": station["longitude"]
    }
    for station in stations_data[:5]
]


For the sake of security and user flexibilty, the above script requests manual entry of the corresponding API key(s) for the APIs being used (Foursquare & Yelp) at the beginning of the script. This should set the local environmental variable(s) for the API keys moving forward, and allow for each user of this script to provide their own API key. In the case there is no API key available to be used (i.e. in this instance in the upcoming Fourtsquare section), an "else" clause has been included to allow skipping of API key entry.

To search for the bike stations found in Part 1 of this project, the "osaka_docomo_stations.json" file from Part 1 is also "loaded" into this notebook as a prerequisite for the upcoming script to run.

Additionally, for the purpose of this project, the script intentionally limits to only the first 5 stations in the data. This makes for ease of analysis in a manageable sample size and mitigates the danger of exceeding API call limits.

# Foursquare

***PLEASE NOTE: FOR THE FOLLOWING FOURSQUARE SECTION

Unfortunately - despite having signed up and submitted an account and request for access to the Foursquare Developer platform - my API application has been stuck in "processing and review". As such I was unable to generate an API key as the Foursquare platform prevents any such requests until the application has been approved. I had sent a support request detailing the need and time-sensitive urgency but to no avail.

Therefore, I have still included the intended Python script that would have been used, had the Foursquare API key been accessible. But do note that no calls were made or data able to be retrieved as a result of not having an API key. This has been communicated with a mentor over Assistance Request and has been noted withe Student Success Coordinator team.

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice.

In [None]:
headers = {
    "Authorization": f"Bearer {api_key_4sq}",
    "Accept": "application/json"
}

venue_data = []

for station in stations:
    lat = station["latitude"]
    lon = station["longitude"]
    station_name = station["name"]

    url = (
        f"https://api.foursquare.com/v3/places/search?"
        f"ll={lat},{lon}"
        f"&radius=1000"
        f"&categories=13065,13003"  # Per Foursquare category IDs for restaurants and bars
        f"&limit=10"  # Limited to the first 10 venues per station
    )

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        results = response.json().get("results", [])
        for venue in results:
            venue_data.append({
                "station_name": station_name,
                "venue": venue
            })


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
parsed_venue_data = []

for item in venue_data:
    venue = item["venue"]
    venue_id = venue.get("fsq_id")

    venue_name = venue.get("name", "N/A")
    venue_category = venue["categories"][0]["name"] if venue.get("categories") else "N/A"
    location = venue.get("location", {})
    venue_address = location.get("formatted_address") or f"{location.get('address', '')} {location.get('locality', '')}".strip()
    lat = venue.get("geocodes", {}).get("main", {}).get("latitude")
    lon = venue.get("geocodes", {}).get("main", {}).get("longitude")

    venue_rating = "N/A"
    if venue_id:
        detail_url = f"https://api.foursquare.com/v3/places/{venue_id}"
        detail_response = requests.get(detail_url, headers=headers)

        if detail_response.status_code == 200:
            venue_details = detail_response.json()
            venue_rating = venue_details.get("rating", "N/A")

    parsed_venue_data.append({
        "station_name": item["station_name"],
        "venue_name": venue_name,
        "venue_category": venue_category,
        "venue_latitude": lat,
        "venue_longitude": lon,
        "venue_address": venue_address,
        "venue_rating": venue_rating
    })


Put your parsed results into a DataFrame

In [None]:
df_4sq = json_normalize(parsed_venue_data)

df_4sq.head()

Optional - Save JSON to local directory

In [None]:
output_json_path = "4sq_venues_osaka.json"

df_4sq.to_json(output_json_path, orient="records", indent=2, force_ascii=False)

print(f"JSON file saved.")

 Optional - Save as CSV to local directory

In [None]:
output_csv_path = "4sq_venues_osaka.csv"

df_4sq.to_csv(output_csv_path, index=False, encoding="utf-8-sig")

print(f"CSV file saved.")


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice.

In [None]:
headers_yelp = {
    "Authorization": f"Bearer {api_key_yelp}"
}

yelp_venue_data = []

for station in stations:
    lat = station["latitude"]
    lon = station["longitude"]
    station_name = station["name"]

    url = (
        f"https://api.yelp.com/v3/businesses/search?"
        f"latitude={lat}&longitude={lon}"
        f"&radius=1000"
        f"&categories=restaurants,bars"
        f"&limit=10" # Limited to the first 10 venues per station
    )

    response = requests.get(url, headers=headers_yelp)

    if response.status_code == 200:
        results = response.json().get("businesses", [])
        for business in results:
            yelp_venue_data.append({
                "station_name": station_name,
                "venue": business
            })


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
parsed_yelp_data = []

for item in yelp_venue_data:
    business = item["venue"]
    venue_name = business.get("name", "N/A")
    venue_rating = business.get("rating", "N/A") # Note the Yelp API INCLUDES rating in its v3/businesses/search? endpoint response, unlike Foursquare
    venue_category = business["categories"][0]["title"] if business.get("categories") else "N/A"
    venue_latitude = business.get("coordinates", {}).get("latitude")
    venue_longitude = business.get("coordinates", {}).get("longitude")
    venue_address = ", ".join(business.get("location", {}).get("display_address", []))

    parsed_yelp_data.append({
        "station_name": item["station_name"],
        "venue_name": venue_name,
        "venue_category": venue_category,
        "venue_latitude": venue_latitude,
        "venue_longitude": venue_longitude,
        "venue_address": venue_address,
        "venue_rating": venue_rating
    })


Put your parsed results into a DataFrame

In [None]:
df_yelp = json_normalize(parsed_yelp_data)

df_yelp.head()

Unnamed: 0,station_name,venue_name,venue_category,venue_latitude,venue_longitude,venue_address,venue_rating
0,208.名鉄協商パーキング太閤第12,Maruya Honten Meieki,Unagi,35.169632,136.883985,"中村区名駅1-2-1, 名鉄百貨店本館 9F, Nagoya, 愛知県 〒450-0002,...",4.7
1,208.名鉄協商パーキング太閤第12,Midtown BBQ Nagoya,Barbeque,35.168346,136.891907,"中村区名駅5丁目24−3, Nagoya, 愛知県 〒450-0002, Japan",4.6
2,208.名鉄協商パーキング太閤第12,Hitsumabushi Nagoya Bincho Esca,Unagi,35.170302,136.87956,"中村区椿町6-9, 地下街内, Nagoya, 愛知県 〒453-0015, Japan",4.6
3,208.名鉄協商パーキング太閤第12,Misen JR Nagoya Station,Ramen,35.16869,136.882478,"中村区名駅1-1-4, Nagoya, 愛知県 〒450-0002, Japan",4.2
4,208.名鉄協商パーキング太閤第12,Ippudō Nagoyaekimendōri,Ramen,35.169868,136.881301,"中村区名駅1-1-4, JR名古屋駅 名古屋うまいもん通り内, Nagoya, 愛知県 〒4...",4.8


Optional - Save JSON to local directory

In [None]:
output_json_path = "yelp_venues_osaka.json"

df_yelp.to_json(output_json_path, orient="records", indent=2, force_ascii=False)

print(f"JSON file saved.")

JSON file saved.


Optional - Save as CSV to local directory

In [None]:
output_csv_path = "yelp_venues_osaka.csv"

df_yelp.to_csv(output_csv_path, index=False, encoding="utf-8-sig")

print(f"CSV file saved.")

CSV file saved.


# Comparing Results

Which API provided you with more complete data? Provide an explanation.

It would seem that the Yelp API provided the more complete data set, at least when it comes to coverage for resteraunts and bars. "Coverage" here meaning readily available information for said establishments such as ratings, categories, and such.

Despite not having been able to pull the Foursquare date (please see "**Note" at the top of the Foursquare cell of this notebook) my conclusion came from exploring the structures of both APIs.

Yelp has all the required information within its "v3/businesses/search" endpoint. Most notably, the ratings per restaurant, which is a key piece of data that is intended to be used for the upcoming data analysis.

However, Foursquare required an additional call beyond its equivalent to the standard search endpoint (v3/places/search). To acquire the rating data, of each restaurant, each specific "place" (restaurant) had to be called using their unique IDs within the Foursquare database ("fsq_id").

This may be a result of different focuses and purposes for each platform. While Yelp is often known for it's community-sourced ratings and recommendations for restaurants, Foursquare is mainly known for its city-wide guide. As a result, it may be better for POIs as a whole, but not as optimized for solely restaurants.

Get the top 10 restaurants according to their rating

In [None]:
df_yelp["venue_rating"] = pandas.to_numeric(df_yelp["venue_rating"], errors="coerce")

df_yelp.sort_values(by="venue_rating", ascending=False).head(10)


Unnamed: 0,station_name,venue_name,venue_category,venue_latitude,venue_longitude,venue_address,venue_rating
13,058.金山駅南駅前広場ステーション,Kenzan,Sushi Bars,35.142677,136.899998,"中区金山町1丁目1-1, ANAクラウンプラザホテル29F, Nagoya, 愛知県 〒46...",5.0
7,208.名鉄協商パーキング太閤第12,Sengokusushihonten,Sushi Bars,35.166128,136.878131,"太閤4丁目4-3, 名古屋市 中村区, 愛知県 〒453-0801, Japan",5.0
12,058.金山駅南駅前広場ステーション,Keishinndou,Japanese,35.141882,136.900603,"金山町1丁目5-4, 名古屋市 熱田区, 愛知県 〒456-0002, Japan",5.0
39,104.ファミリーマート葵店,Budoutei,French,35.172026,136.918505,"東区東桜2丁目17-46, Nagoya, 愛知県 〒461-0005, Japan",5.0
26,429.TENAS泉,Daikoumochi,Dumplings,35.174994,136.915611,"東区泉2丁目25-20, Nagoya, 愛知県 〒461-0001, Japan",5.0
22,429.TENAS泉,Kadomaru,Udon,35.175045,136.911667,"東区泉1-18-33, Nagoya, 愛知県 〒461-0001, Japan",5.0
29,429.TENAS泉,DESPERADOS,Bars,35.16832,136.918247,"新栄1丁目8-11, 名古屋市 中区, 愛知県 〒460-0007, Japan",4.8
5,208.名鉄協商パーキング太閤第12,Yanagibashi Kitarou,Sushi Bars,35.170334,136.887488,"中村区名駅4-16-23, Nagoya, 愛知県, Japan",4.8
4,208.名鉄協商パーキング太閤第12,Ippudō Nagoyaekimendōri,Ramen,35.169868,136.881301,"中村区名駅1-1-4, JR名古屋駅 名古屋うまいもん通り内, Nagoya, 愛知県 〒4...",4.8
40,190.名鉄協商パーキング熱田神宮北,Atsuta Hōraiken Honten,Unagi,35.120207,136.906893,"熱田区神戸町503, Nagoya, 愛知県 〒456-0043, Japan",4.8


As seen above, the top 10 restaurants from our DataFrame (according to rating) has been sorted and outputted for visual comparison.

Applying some DATA CLEANING techniques, the first line ensures that any values in the "venue_rating" column are, indeed, numeric and therefore ready to be sorted. Additionally, another argument is added to the "errors" parameter to ensure that any values that are *NOT* numerical will be outputted as "NaN" (not a number), to avoid any confusion or breaking of the script.

The second line then sorts and outputs the requested top 10 restaurants based on our CityBikes data around Osaka (via the Docomo Bike Share network), within a 1000m radius around our bike stations.