In [None]:
# imports

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [1]:
# the code provided here isnt a request for foursquare for all bike station in city rather the bike station code inorder for the request to get a fourthsquare work and you will get information about
#sending a request to foursquare to extarct data in the following two spaces
import requests
import pandas as pd
import os

# code for fetching bike stations data
response = requests.get("http://api.citybik.es/v2/networks/citi-bike-nyc")
data = response.json()

bike_stations = data['network']['stations']
bike_stations = pd.json_normalize(bike_stations)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [2]:
# Function to get number of POI by using the variable venue from Foursquare API
def get_venues_fs(latitude, longitude, radius, api_key, category_id, limit=50):
    url = f'https://api.foursquare.com/v3/places/search?ll={latitude},{longitude}&radius={radius}&categories={category_id}&limit={limit}'
    headers = {'Authorization': api_key}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        venues = response.json()['results']
        return len(venues)
    else:
        return 0

Put your parsed results into a DataFrame

In [5]:
# Here is the important step of the project using the for loop we will extract information for every and each bike stations 

API_KEY = os.getenv('FOURSQUARE_KEY')  # Set your Foursquare API key using Environment API

# Define category IDs for different POIs(point of interest)
CATEGORY_PUBLIC_TRANSPORT = 19051 
CATEGORY_CHINESE_RESTAURANT = 13099
CATEGORY_SPORT_RECREATION = 18021

# New DataFrame for POIs
poi_data = pd.DataFrame(columns=['bike_station_id', 'number_of_public_transportation', 'number_of_restaurants', 'number_of_sport_recreation'])

# Iterate over the dataset and apply the function

all_data = [] # List to store data from each iteration

for index, row in bike_stations.head(500).iterrows():    # to make my dataset specific and not to waste time for getting data for each 2167 bikes am gonna choose only 750, that is why i put head(750)
    public_transport = get_venues_fs(row['latitude'], row['longitude'], 1000, API_KEY, CATEGORY_PUBLIC_TRANSPORT)
    chinese_restaurants = get_venues_fs(row['latitude'], row['longitude'], 1000, API_KEY, CATEGORY_CHINESE_RESTAURANT)
    sport_recreation = get_venues_fs(row['latitude'], row['longitude'], 1000, API_KEY, CATEGORY_SPORT_RECREATION)
    all_data.append({
        'bike_station_id': row['id'],
        'number_of_public_transportation': public_transport,
        'number_of_chinese_restaurants': chinese_restaurants,
        'number_of_sport_recreation': sport_recreation
    })
    
    

# Concatenate all the temporary DataFrames
poi_data = pd.DataFrame(all_data)

# Print the final DataFrame
poi_data

Unnamed: 0,bike_station_id,number_of_public_transportation,number_of_chinese_restaurants,number_of_sport_recreation
0,46a983722ee1f51813a6a3eb6534a6e4,14,22,50
1,81872dcff5f0e5f1e042aafcc0bab6fd,37,25,42
2,fdb4831d3335a9aea380117ea4f4b81a,12,21,43
3,678fea3bb3afe05788a35750358db726,18,19,32
4,7ca4ed0b4fba2b2fb259ba84bd43e2ad,45,35,50
...,...,...,...,...
495,832bea3175d9d41c33795a52a1e74647,3,9,38
496,0243a8e61a86e32894867c42ebd883e6,6,9,30
497,e5a29af3b9346d4415f00946adafbd18,3,11,48
498,b5c4ad1cbac234b67785a1ab920eb044,4,6,16


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [13]:
# Here is also the same thing like the above
import requests
import pandas as pd

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [14]:
# Function to get number of POI from Yelp API
def get_yelp_data_count(latitude, longitude, api_key, categories, radius=1000, limit=50):
    url = 'https://api.yelp.com/v3/businesses/search'
    headers = {'Authorization': f'Bearer {api_key}'}
    params = {
        'latitude': latitude,
        'longitude': longitude,
        'radius': radius,
        'categories': categories,
        'limit': limit
    }
    response = requests.get(url, headers=headers, params=params)
    if response.status_code != 200:
        print(f"Error with Yelp API call: {response.status_code}")
        return 0
    else:
        return len(response.json()['businesses'])

Put your parsed results into a DataFrame

In [17]:
# main part of the code
API_KEY_YELP = 'xINzSmFfZorhze8u7Jx_WxaDj_KArxluNYvAnD0tW5fb1KeSY3IXK5QCVdUkI1ty1E69MtyoRwhltNJVgJOq4ZRqO-LLpEcoVelJUFgi0xn1hJCYS9LAO7FiWBV_ZXYx' # Set your Yelp API key

# DataFrame to store the counts
yelp_data_counts = []

# Iterate over bike stations and fetch Yelp data for Chinese category
for index, row in bike_stations.head(500).iterrows():  # the maximum numbeer i can call in Yelp API is 500 that is why i limit the number to 500 and to get the same number as that of foursquare
    chinese_count = get_yelp_data_count(row['latitude'], row['longitude'], API_KEY_YELP, 'chinese')
    yelp_data_counts.append({
        'bike_station_id': row['id'],
        'chinese_count': chinese_count
    })

# Create a DataFrame from the collected counts
yelp_counts_df = pd.DataFrame(yelp_data_counts)

# Displaying the DataFrame
yelp_counts_df


Unnamed: 0,bike_station_id,chinese_count
0,46a983722ee1f51813a6a3eb6534a6e4,32
1,81872dcff5f0e5f1e042aafcc0bab6fd,46
2,fdb4831d3335a9aea380117ea4f4b81a,30
3,678fea3bb3afe05788a35750358db726,36
4,7ca4ed0b4fba2b2fb259ba84bd43e2ad,50
...,...,...
495,832bea3175d9d41c33795a52a1e74647,16
496,0243a8e61a86e32894867c42ebd883e6,17
497,e5a29af3b9346d4415f00946adafbd18,15
498,b5c4ad1cbac234b67785a1ab920eb044,13


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

In evaluating the Foursquare and Yelp APIs for retrieving Points of Interest (POIs), particularly Chinese restaurants, I found notable differences in data coverage and specificity. My analysis was guided by the goal of obtaining detailed information on Chinese restaurants around specific bike stations. The Foursquare API demonstrated a high degree of versatility in data retrieval. It allowed for layered querying, ranging from broad categories to highly specific ones. For example, the API provided the flexibility to search for any restaurant, then narrow down to Asian restaurant, further to Chinese-specific restaurant, and even to individual restaurant names if desired, while, the Yelp API appeared to offer two distinct types of data retrieval: broad overviews or detailed information on specific venues. The middle ground, which is often useful for analysis, seemed less accessible. In comparing the Foursquare and Yelp APIs, I focused on the number and detail of Chinese restaurants as a common category across both platforms and i was able to compare them and i can see that thery have difference interms of the number of chinese for restaurants and suprisingly in my case the number of the average number of chinese restaurant that i got from foursquare and yelp is 18 and 25 respectively, which is strange, so this shows that it is not always the case where FSQ provide a lot of information than yelp, while for the categories unique to each API, Foursquare's data on public transportation and sports/recreation venues around bike stations offers valuable insights for users looking for connectivity and leisure activities. In general FSQ allowed for detailed explorations of local amenities, which is essential for users requiring in-depth analysis of nearby POI offerings. Yelp, while offering valuable insights, had limitations in the flexibility of data although i got more data in the case of chinese restaurant, which could hinder detailed analyses.

Get the top 10 restaurants according to their rating

### As you can see from my data rating was not my topic, so i can't get top 10 restaurant accordinng to their rating. but i will provide below top 10 bike station with a lot of number of chinese restaurant

In [25]:
poi_data[['bike_station_id', 'number_of_chinese_restaurants']].sort_values(by='number_of_chinese_restaurants', ascending=False).head(10)

Unnamed: 0,bike_station_id,number_of_chinese_restaurants
393,fdf85ee78d2b08da8702caf317743a25,50
379,d011df899229420210406a4c7382ae43,50
243,12aa7924b713843d73c4432e4e35fa39,50
61,506ef3941933df4d38b00a3ae3fae8f2,50
468,aece94aba5628d18ff397efd44cd1b83,50
373,88baf784aeeca189b3e6db060603a440,50
273,902236f7c08be690cda3a4e4a48ea005,50
476,dc49cf8ba087c4d478091d789e37f632,50
450,324b7111c8f0114b7fe0e5bed4f56a2a,50
342,337518644c49baaccf74a42d3e1ce0b7,50


In [28]:
poi_data.to_csv('Foursquare.csv', index=False)# i am saving in excel this file because i want to access it in the joining parts of the assignmnt and in building model, the same for yelp also.

In [29]:
yelp_counts_df.to_csv('yelp.csv', index=False)