In [1]:
# Imports libaries
import pandas as pd
import numpy as np
import requests
import os
import pprint

# Set option to view max 500 columns
pd.set_option('display.max_columns', 500)

In [2]:
# Recall the bike stations latitude and logitude from our saved 'montreal_stations_coordinate.csv' file
montreal_stations_coordinate = pd.read_csv('../data/montreal_stations_coordinate.csv')
montreal_stations_coordinate

Unnamed: 0,station_latitude,station_longitude
0,45.617500,-73.606011
1,45.516926,-73.564257
2,45.506176,-73.711186
3,45.512994,-73.682498
4,45.514734,-73.691449
...,...,...
793,45.533815,-73.578905
794,45.509911,-73.563806
795,45.497605,-73.555350
796,45.501188,-73.570455


Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [3]:
FOURSQUARE_KEY = os.getenv('fsq_key')

In [4]:
# Create a function to get places from Foursquare API
def get_fsq_places(latitude, longitude, radius, api_key, categories):

    """
    Get venues from Foursquare API based on latitude, longitude, radius, and categories.

    Parameters:
        latitude (float): The latitude coordinate of the location.
        longitude (float): The longitude coordinate of the location.
        radius (int): The search radius in meters.
        api_key (str): Your Foursquare API key for authentication.
        categories (str): A comma-separated list of venue categories.

    Returns:
        json: JSON data containing the response data from the Foursquare API.

    Example:
        response_data = get_fsq_places(atitude=40.7128, longitude=-74.0060, radius=1000, "YOUR_API_KEY", categories="13003,13004")
        # Returns JSON data with places information based on the provided parameters.
    """
    
    # Form the url
    fsq_url = 'https://api.foursquare.com/v3/places/search?' + f"ll={latitude},{longitude}" + f"&radius={radius}" + f"&categories={categories}"
    
    # Create dictionary for headers
    headers = {"Accept": "application/json"}
    
    # Add key with our API KEY
    headers['Authorization'] = api_key
    
    # Get the response, check status
    fsq_result = requests.get(fsq_url, headers=headers)
    
    return fsq_result.json()

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [5]:
# Create an empty DataFrame to store the results from Foursquare for bars with 1000m radius from Montreal bike stations
fsq_bars_df = pd.DataFrame()

# Prep a string including all the list of desired categories, ranging from 13003 to 13025 seperated by ',' for bars
cat_lst = list(range(13003, 13025))
cat_str_lst = [str(n) for n in cat_lst]
cat_str = ",".join(cat_str_lst)

for index, row in montreal_stations_coordinate.iterrows():
    latitude = row['station_latitude']
    longitude = row['station_longitude']
    fsq_bar_data = get_fsq_places(latitude=latitude, longitude=longitude, radius=1000, api_key=FOURSQUARE_KEY, categories=cat_str)
    
    # Convert the 'fsq_bar_data' dictionary to a DataFrame, append it to 'fsq_bars_df'
    fsq_bar_details = pd.DataFrame.from_dict(fsq_bar_data['results'])
    fsq_bars_df = pd.concat([fsq_bars_df, fsq_bar_details], ignore_index=True)

In [6]:
fsq_bars_df.head(5)

Unnamed: 0,fsq_id,categories,chains,distance,geocodes,link,location,name,related_places,timezone
0,ec53d9eccf6a445dc5ad4526,"[{'id': 13022, 'name': 'Sports Bar', 'icon': {...",[],246,"{'drop_off': {'latitude': 45.61913, 'longitude...",/v3/places/ec53d9eccf6a445dc5ad4526,"{'address': '7000 Maurice-Duplessis Blvd', 'co...",Resto-bar Capucine - Nord-Est de Montréal,{},
1,598f0623747e41848ab1057c,"[{'id': 13009, 'name': 'Cocktail Bar', 'icon':...",[],661,"{'main': {'latitude': 45.611645, 'longitude': ...",/v3/places/598f0623747e41848ab1057c,"{'address': '6711 Henri Bourassa Blvd Est N', ...",Piano Bar la Belle Epoque,{},
2,eec429fe03c84972cf87ffa7,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],809,"{'drop_off': {'latitude': 45.624169, 'longitud...",/v3/places/eec429fe03c84972cf87ffa7,"{'address': '7310 Maurice-Duplessis Blvd', 'co...",Cafe liana bar & grill,{},
3,401e519268b849bfa9d2bc9b,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],960,"{'main': {'latitude': 45.609744, 'longitude': ...",/v3/places/401e519268b849bfa9d2bc9b,"{'address': '6363 Henri-Bourassa Blvd E', 'cou...",La Veranda,{},
4,4b0c1ff8f964a520603723e3,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],132,"{'main': {'latitude': 45.515893, 'longitude': ...",/v3/places/4b0c1ff8f964a520603723e3,"{'address': '1749 Saint-Denis Rue', 'country':...",Le Saint Bock,{},America/Toronto


In [7]:
fsq_bars_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5907 entries, 0 to 5906
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   fsq_id          5907 non-null   object
 1   categories      5907 non-null   object
 2   chains          5907 non-null   object
 3   distance        5907 non-null   int64 
 4   geocodes        5907 non-null   object
 5   link            5907 non-null   object
 6   location        5907 non-null   object
 7   name            5907 non-null   object
 8   related_places  5907 non-null   object
 9   timezone        5314 non-null   object
dtypes: int64(1), object(9)
memory usage: 461.6+ KB


In [8]:
# Check if 'rating' is provided from 'fsq_bars_df'
'rating' in fsq_bars_df.columns

False

In [9]:
# Convert into 'fsq_bars_df.csv' file for archival of the Foursquare API call dataframe
fsq_bars_df.to_csv('../data/fsq_bars_df.csv')

In [10]:
# Check if bar 'fsq_id' is duplicated
fsq_dup = fsq_bars_df[fsq_bars_df[['fsq_id']].duplicated() == True].sort_values('fsq_id')
fsq_dup

Unnamed: 0,fsq_id,categories,chains,distance,geocodes,link,location,name,related_places,timezone
3247,04370f71f5954b42f7db79fb,"[{'id': 13015, 'name': 'Karaoke Bar', 'icon': ...",[],288,"{'drop_off': {'latitude': 45.553447, 'longitud...",/v3/places/04370f71f5954b42f7db79fb,"{'address': '2480 Jean-Talon Rue E', 'country'...",Au Ptit Buck,{},
2200,04370f71f5954b42f7db79fb,"[{'id': 13015, 'name': 'Karaoke Bar', 'icon': ...",[],680,"{'drop_off': {'latitude': 45.553447, 'longitud...",/v3/places/04370f71f5954b42f7db79fb,"{'address': '2480 Jean-Talon Rue E', 'country'...",Au Ptit Buck,{},
5108,04370f71f5954b42f7db79fb,"[{'id': 13015, 'name': 'Karaoke Bar', 'icon': ...",[],465,"{'drop_off': {'latitude': 45.553447, 'longitud...",/v3/places/04370f71f5954b42f7db79fb,"{'address': '2480 Jean-Talon Rue E', 'country'...",Au Ptit Buck,{},
3205,04370f71f5954b42f7db79fb,"[{'id': 13015, 'name': 'Karaoke Bar', 'icon': ...",[],501,"{'drop_off': {'latitude': 45.553447, 'longitud...",/v3/places/04370f71f5954b42f7db79fb,"{'address': '2480 Jean-Talon Rue E', 'country'...",Au Ptit Buck,{},
3264,04370f71f5954b42f7db79fb,"[{'id': 13015, 'name': 'Karaoke Bar', 'icon': ...",[],478,"{'drop_off': {'latitude': 45.553447, 'longitud...",/v3/places/04370f71f5954b42f7db79fb,"{'address': '2480 Jean-Talon Rue E', 'country'...",Au Ptit Buck,{},
...,...,...,...,...,...,...,...,...,...,...
2347,f7f61a67218b481a5dabe5b0,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],141,"{'drop_off': {'latitude': 45.4349, 'longitude'...",/v3/places/f7f61a67218b481a5dabe5b0,"{'address': '580 Notre-Dame Rue', 'country': '...",Bar Liqueur,{},
4334,f7f61a67218b481a5dabe5b0,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],592,"{'drop_off': {'latitude': 45.4349, 'longitude'...",/v3/places/f7f61a67218b481a5dabe5b0,"{'address': '580 Notre-Dame Rue', 'country': '...",Bar Liqueur,{},
2280,f7f61a67218b481a5dabe5b0,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],808,"{'drop_off': {'latitude': 45.4349, 'longitude'...",/v3/places/f7f61a67218b481a5dabe5b0,"{'address': '580 Notre-Dame Rue', 'country': '...",Bar Liqueur,{},
4300,fca85dc09277444177373453,"[{'id': 13003, 'name': 'Bar', 'icon': {'prefix...",[],174,"{'main': {'latitude': 45.611667, 'longitude': ...",/v3/places/fca85dc09277444177373453,"{'address': '9751 Notre-Dame Rue E', 'country'...",Le Relais du Voyageur,{},


Most duplication happens in the 'distance' column as we looked up the POI bar info via different bike station coordinates. There are possibilities of impacts on duplication from 'timezone' as well with missing values. We can drop the unimportant column 'timezone' and keep the first row of 'fsq_id' during the removal of duplicated records.

In [18]:
# Copy to a new dataframe 'merged_yelp_bars_df2' to clean data
fsq_bars_df2 = fsq_bars_df.copy()

# Remove the column 'timezone'
fsq_bars_df2 = fsq_bars_df2.drop(['timezone'], axis=1)

In [19]:
# Keep the first row if duplicated at fsq_id
fsq_bars_df2.drop_duplicates(subset='fsq_id', keep='first', inplace=True)

In [20]:
# Now we have 578 rows left after removing duplicated rows
fsq_bars_df2.shape

(578, 9)

In [21]:
# Convert this cleaned up Foursquare bar data into fsq_bars_df2.csv' file
fsq_bars_df2.to_csv('../data/fsq_bars_df2.csv')

Put your parsed results into a DataFrame

In [22]:
# View all columns of the cleaned up 'fsq_bars_df2' whether we should use them all
fsq_bars_df2.columns

Index(['fsq_id', 'categories', 'chains', 'distance', 'geocodes', 'link',
       'location', 'name', 'related_places'],
      dtype='object')

In [23]:
# Initialize lists to only select data from some meaningful columns: name, postcode, distance
name = []
postcode =[]
distance = []

# Extract data from 'result_df'
for index, record in fsq_bars_df2.iterrows():
    name.append(record['name'])
    location = record['location']
    postcode.append(location.get('postcode', ''))
    distance.append(record['distance'])

# Create a DataFrame from the extracted data
fsq_bar_features_df = pd.DataFrame({
    'name': name,
    'postcode': postcode,
    'distance': distance
})

# Print the head of the DataFrame
fsq_bar_features_df.head(5)

Unnamed: 0,name,postcode,distance
0,Resto-bar Capucine - Nord-Est de Montréal,,246
1,Piano Bar la Belle Epoque,H1G 2V6,661
2,Cafe liana bar & grill,H1E 1M4,809
3,La Veranda,H1G 2V5,960
4,Le Saint Bock,H2X 3K4,132


In [24]:
fsq_bar_features_df.shape

(578, 3)

In [25]:
# Save the Foursquare's Montreal bar results to csv
fsq_bar_features_df.to_csv('../data/fsq_bar_features.csv')