In [1]:
# Import libaries
import pandas as pd
import numpy as np
import requests
import os
import pprint

# Set option to view max 500 columns
pd.set_option('display.max_columns', 500)

In [2]:
# Recall the bike stations latitude and logitude from our saved 'montreal_stations_coordinate.csv' file
montreal_stations_coordinate = pd.read_csv('../data/montreal_stations_coordinate.csv')
montreal_stations_coordinate

Unnamed: 0,cb_latitude,cb_longitude
0,45.617500,-73.606011
1,45.516926,-73.564257
2,45.506176,-73.711186
3,45.512994,-73.682498
4,45.514734,-73.691449
...,...,...
793,45.497605,-73.555350
794,45.501188,-73.570455
795,45.492913,-73.564688
796,45.545188,-73.576443


Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [3]:
FOURSQUARE_KEY = os.getenv('fsq_key')

In [4]:
# Create a function to get places from Foursquare API
def get_fsq_places(latitude, longitude, radius, api_key, categories):

    """
    Get venues from Foursquare API based on latitude, longitude, radius, and categories.

    Parameters:
        latitude (float): The latitude coordinate of the location.
        longitude (float): The longitude coordinate of the location.
        radius (int): The search radius in meters.
        api_key (str): Your Foursquare API key for authentication.
        categories (str): A comma-separated list of venue categories.

    Returns:
        json: JSON data containing the response data from the Foursquare API.

    Example:
        response_data = get_fsq_places(atitude=40.7128, longitude=-74.0060, radius=1000, "YOUR_API_KEY", categories="13003,13004")
        # Returns JSON data with places information based on the provided parameters.
    """
    
    # Form the url
    fsq_url = 'https://api.foursquare.com/v3/places/search?' + f"ll={latitude},{longitude}" + f"&radius={radius}" + f"&categories={categories}"
    
    # Create dictionary for headers
    headers = {"Accept": "application/json"}
    
    # Add key with our API KEY
    headers['Authorization'] = api_key
    
    # Get the response, check status
    fsq_result = requests.get(fsq_url, headers=headers)
    
    return fsq_result.json()

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [5]:
# Create an empty DataFrame to store the results from Foursquare for bars with 1000m radius from Montreal bike stations
fsq_bars_df = pd.DataFrame()

# Prep a string including all the list of desired categories, ranging from 13003 to 13025 seperated by ',' for bars
cat_lst = list(range(13003, 13025))
cat_str_lst = [str(n) for n in cat_lst]
cat_str = ",".join(cat_str_lst)

for index, row in montreal_stations_coordinate.iterrows():
    latitude = row['cb_latitude']
    longitude = row['cb_longitude']
    fsq_bar_data = get_fsq_places(latitude=latitude, longitude=longitude, radius=1000, api_key=FOURSQUARE_KEY, categories=cat_str)
    
    # Convert the 'fsq_bar_data' dictionary to a DataFrame
    fsq_bar_details = pd.DataFrame.from_dict(fsq_bar_data['results'])
    
    # Add latitude and longitude to fsq_bar_details DataFrame
    fsq_bar_details['fsq_latitude'] = latitude
    fsq_bar_details['fsq_longitude'] = longitude

    # Append it to 'fsq_bars_df'
    fsq_bars_df = pd.concat([fsq_bars_df, fsq_bar_details], ignore_index=True)

In [6]:
fsq_bars_df.head(5)

Unnamed: 0,fsq_id,categories,chains,distance,geocodes,link,location,name,related_places,fsq_latitude,fsq_longitude,timezone
0,ec53d9eccf6a445dc5ad4526,"[{'id': 13022, 'name': 'Sports Bar', 'icon': {...",[],246.0,"{'drop_off': {'latitude': 45.61913, 'longitude...",/v3/places/ec53d9eccf6a445dc5ad4526,"{'address': '7000 Maurice-Duplessis Blvd', 'co...",Resto-bar Capucine - Nord-Est de Montréal,{},45.6175,-73.606011,
1,598f0623747e41848ab1057c,"[{'id': 13009, 'name': 'Cocktail Bar', 'icon':...",[],661.0,"{'main': {'latitude': 45.611645, 'longitude': ...",/v3/places/598f0623747e41848ab1057c,"{'address': '6711 Henri Bourassa Blvd Est N', ...",Piano Bar la Belle Epoque,{},45.6175,-73.606011,
2,eec429fe03c84972cf87ffa7,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],809.0,"{'drop_off': {'latitude': 45.624169, 'longitud...",/v3/places/eec429fe03c84972cf87ffa7,"{'address': '7310 Maurice-Duplessis Blvd', 'co...",Cafe liana bar & grill,{},45.6175,-73.606011,
3,401e519268b849bfa9d2bc9b,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],960.0,"{'main': {'latitude': 45.609744, 'longitude': ...",/v3/places/401e519268b849bfa9d2bc9b,"{'address': '6363 Henri-Bourassa Blvd E', 'cou...",La Veranda,{},45.6175,-73.606011,
4,4b0c1ff8f964a520603723e3,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],132.0,"{'main': {'latitude': 45.515893, 'longitude': ...",/v3/places/4b0c1ff8f964a520603723e3,"{'address': '1749 Saint-Denis Rue', 'country':...",Le Saint Bock,{},45.516926,-73.564257,America/Toronto


In [7]:
fsq_bars_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5907 entries, 0 to 5906
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   fsq_id          5907 non-null   object 
 1   categories      5907 non-null   object 
 2   chains          5907 non-null   object 
 3   distance        5907 non-null   float64
 4   geocodes        5907 non-null   object 
 5   link            5907 non-null   object 
 6   location        5907 non-null   object 
 7   name            5907 non-null   object 
 8   related_places  5907 non-null   object 
 9   fsq_latitude    5907 non-null   float64
 10  fsq_longitude   5907 non-null   float64
 11  timezone        5314 non-null   object 
dtypes: float64(3), object(9)
memory usage: 553.9+ KB


In [8]:
# Check if 'rating' is provided from 'fsq_bars_df'
'rating' in fsq_bars_df.columns

False

In [9]:
# Convert into 'fsq_bars_df.csv' file for archival of the Foursquare API call dataframe
fsq_bars_df.to_csv('../data/fsq_bars_df.csv')

In [10]:
# Check if bar 'fsq_id' is duplicated
fsq_dup = fsq_bars_df[fsq_bars_df[['fsq_id']].duplicated() == True].sort_values('fsq_id')
fsq_dup

Unnamed: 0,fsq_id,categories,chains,distance,geocodes,link,location,name,related_places,fsq_latitude,fsq_longitude,timezone
3856,04370f71f5954b42f7db79fb,"[{'id': 13015, 'name': 'Karaoke Bar', 'icon': ...",[],863.0,"{'drop_off': {'latitude': 45.553447, 'longitud...",/v3/places/04370f71f5954b42f7db79fb,"{'address': '2480 Jean-Talon Rue E', 'country'...",Au Ptit Buck,{},45.552289,-73.613006,
5108,04370f71f5954b42f7db79fb,"[{'id': 13015, 'name': 'Karaoke Bar', 'icon': ...",[],465.0,"{'drop_off': {'latitude': 45.553447, 'longitud...",/v3/places/04370f71f5954b42f7db79fb,"{'address': '2480 Jean-Talon Rue E', 'country'...",Au Ptit Buck,{},45.554583,-73.596319,
3272,04370f71f5954b42f7db79fb,"[{'id': 13015, 'name': 'Karaoke Bar', 'icon': ...",[],201.0,"{'drop_off': {'latitude': 45.553447, 'longitud...",/v3/places/04370f71f5954b42f7db79fb,"{'address': '2480 Jean-Talon Rue E', 'country'...",Au Ptit Buck,{},45.551774,-73.603478,
3264,04370f71f5954b42f7db79fb,"[{'id': 13015, 'name': 'Karaoke Bar', 'icon': ...",[],478.0,"{'drop_off': {'latitude': 45.553447, 'longitud...",/v3/places/04370f71f5954b42f7db79fb,"{'address': '2480 Jean-Talon Rue E', 'country'...",Au Ptit Buck,{},45.549755,-73.605566,
429,04370f71f5954b42f7db79fb,"[{'id': 13015, 'name': 'Karaoke Bar', 'icon': ...",[],835.0,"{'drop_off': {'latitude': 45.553447, 'longitud...",/v3/places/04370f71f5954b42f7db79fb,"{'address': '2480 Jean-Talon Rue E', 'country'...",Au Ptit Buck,{},45.554637,-73.612552,
...,...,...,...,...,...,...,...,...,...,...,...,...
2347,f7f61a67218b481a5dabe5b0,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],141.0,"{'drop_off': {'latitude': 45.4349, 'longitude'...",/v3/places/f7f61a67218b481a5dabe5b0,"{'address': '580 Notre-Dame Rue', 'country': '...",Bar Liqueur,{},45.434987,-73.670799,
2280,f7f61a67218b481a5dabe5b0,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],808.0,"{'drop_off': {'latitude': 45.4349, 'longitude'...",/v3/places/f7f61a67218b481a5dabe5b0,"{'address': '580 Notre-Dame Rue', 'country': '...",Bar Liqueur,{},45.429167,-73.662810,
192,f7f61a67218b481a5dabe5b0,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],878.0,"{'drop_off': {'latitude': 45.4349, 'longitude'...",/v3/places/f7f61a67218b481a5dabe5b0,"{'address': '580 Notre-Dame Rue', 'country': '...",Bar Liqueur,{},45.435794,-73.680190,
2548,fca85dc09277444177373453,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],912.0,"{'main': {'latitude': 45.611667, 'longitude': ...",/v3/places/fca85dc09277444177373453,"{'address': '9751 Notre-Dame Rue E', 'country'...",Le Relais du Voyageur,{},45.604441,-73.516118,


Most duplication happens in the 'distance' column as we looked up the POI bar info via different bike station coordinates. This is something we can check in part 3 when we merge data from Citybikes, Foursquare and Yelp. There are possibilities of impacts on duplication from other columns as well with different values and missing values. We'll revisit that again in part 3.

Put your parsed results into a DataFrame

In [11]:
# View all columns of 'fsq_bars_df' whether we should use them all
fsq_bars_df.columns

Index(['fsq_id', 'categories', 'chains', 'distance', 'geocodes', 'link',
       'location', 'name', 'related_places', 'fsq_latitude', 'fsq_longitude',
       'timezone'],
      dtype='object')

Explore the categories of bars:

Here's an example code snippet of the JSON result from Foursquare. A bar can be listed different categories, mixed with other types. However, the bar category id and name shows up first before other types of categories, probably because we made the API call for bars.

In [12]:
{'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/cocktails_',
                                       'suffix': '.png'},
                              'id': 13009,
                              'name': 'Cocktail Bar'},
                             {'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/default_',
                                       'suffix': '.png'},
                              'id': 13016,
                              'name': 'Lounge'}]}

{'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/cocktails_',
    'suffix': '.png'},
   'id': 13009,
   'name': 'Cocktail Bar'},
  {'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/default_',
    'suffix': '.png'},
   'id': 13016,
   'name': 'Lounge'}]}

In [14]:
# Initialize lists to only select data from some meaningful columns: name, postcode, distance
name = []
category = []
postcode =[]
distance = []
latitude = []
longitude = []

# Extract data from 'result_df'
for index, record in fsq_bars_df.iterrows():
    name.append(record['name'])

    categories = record['categories'][0]
    category.append(categories.get('name', ''))

    location = record['location']
    postcode.append(location.get('postcode', ''))

    distance.append(record['distance'])
    latitude.append(record['fsq_latitude'])
    longitude.append(record['fsq_longitude'])

# Create a DataFrame from the extracted data
fsq_bar_features_df = pd.DataFrame({
    'name': name,
    'postcode': postcode,
    'category': category,
    'distance': distance,
    'latitude': latitude,
    'latitude': longitude,
})

# Print the head of the DataFrame
fsq_bar_features_df.head(5)

Unnamed: 0,name,postcode,category,distance,latitude
0,Resto-bar Capucine - Nord-Est de Montréal,,Sports Bar,246.0,-73.606011
1,Piano Bar la Belle Epoque,H1G 2V6,Cocktail Bar,661.0,-73.606011
2,Cafe liana bar & grill,H1E 1M4,Bar,809.0,-73.606011
3,La Veranda,H1G 2V5,Bar,960.0,-73.606011
4,Le Saint Bock,H2X 3K4,Bar,132.0,-73.564257


In [15]:
fsq_bar_features_df.shape

(5907, 5)

In [16]:
# Save the Foursquare's Montreal bar results to csv
fsq_bar_features_df.to_csv('../data/fsq_bar_features.csv')