In [None]:
# imports
import requests
import os
import json
import pandas as pd

In [None]:
#retrieve station data from city bikes notebook
with open('tokyo_bike_stations.json') as f:
    stations_data = json.load(f)

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [None]:
api_key = os.environ["FOURSQUARE_API_KEY"]
headers = {"Accept": "application/json"}
headers['Authorization'] = api_key

# get all lat and lon coordinates in a list
# category ids: 16032 -> park, 13034 -> cafe, 13003 -> bar, 17018 ->bookstore

def foursquare_call(stations_data, radius =1000, limit=50):
    data = []
    url ="https://api.foursquare.com/v3/places/search"
    for i in range(len(stations_data)):
        params = {
            'll':f'{stations_data[i]['lat']},{stations_data[i]['lon']}',
            'radius': radius,
            'limit': 50
        }
        response_fs = requests.get(url,params=params,headers=headers)
        data_point= response_fs.json()
        data.append(data_point)
    return data

call_response = foursquare_call(stations_data)

In [None]:
#Execute API call Request
call_response

In [None]:
# export foursquare call json as file in local directory to avoid making multiple calls
with open('foursquare_call.json', 'w') as f:
    json.dump(call_response, f)

In [85]:
# Load foursquare call json file
with open('foursquare_call.json') as f:
    did_it_work = json.load(f)


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [86]:
# check JSON file entry count matches bike station coordinates
print(len(did_it_work)) # 1582 entries
print(len(stations_data)) # 1582 entries
# View structure layout of each entry
print(did_it_work[0].keys())
# check the 'results' format
print(did_it_work[0]['results']) # list of dictionaries
print(len(did_it_work[0]['results'])) #first entry has 50 results reflecting the call limit output
print(did_it_work[0]['results'][0].keys()) # key of interest 'categories'
print(did_it_work[0]['results'][0]['categories'])# categories is a list of dictionaries

1582
1582
dict_keys(['results', 'context'])
[{'fsq_id': '4b79404ff964a520d2f02ee3', 'categories': [{'id': 16020, 'name': 'Historic and Protected Site', 'short_name': 'Historic and Protected Site', 'plural_name': 'Historic and Protected Sites', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/arts_entertainment/historicsite_', 'suffix': '.png'}}], 'chains': [], 'closed_bucket': 'LikelyOpen', 'distance': 135, 'geocodes': {'main': {'latitude': 35.692382, 'longitude': 139.749509}, 'roof': {'latitude': 35.692382, 'longitude': 139.749509}}, 'link': '/v3/places/4b79404ff964a520d2f02ee3', 'location': {'address': '北の丸公園1-1', 'country': 'JP', 'cross_street': '', 'formatted_address': '北の丸公園1-1, 千代田区, 東京都, 102-0091', 'locality': '東京', 'postcode': '102-0091', 'region': '東京都'}, 'name': 'Shimizumon Gate (清水門)', 'related_places': {'parent': {'fsq_id': '4b7237fdf964a52064742de3', 'categories': [{'id': 16032, 'name': 'Park', 'short_name': 'Park', 'plural_name': 'Parks', 'icon': {'prefix': 'htt

In [94]:
[r['categories'] for r in did_it_work[0]['results'][0:3]]

[[{'id': 16020,
   'name': 'Historic and Protected Site',
   'short_name': 'Historic and Protected Site',
   'plural_name': 'Historic and Protected Sites',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/arts_entertainment/historicsite_',
    'suffix': '.png'}}],
 [{'id': 13315,
   'name': 'Noodle Restaurant',
   'short_name': 'Noodles',
   'plural_name': 'Noodle Restaurants',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/ramen_',
    'suffix': '.png'}}],
 [{'id': 16032,
   'name': 'Park',
   'short_name': 'Park',
   'plural_name': 'Parks',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_',
    'suffix': '.png'}}]]

Put your parsed results into a DataFrame

In [None]:
# normalize the first entry into a test dataframe
df_test = pd.json_normalize(did_it_work[0]['results'])
print(df_test.keys()) # only need the first 10 columns
df_test_1 = df_test.iloc[:,:10]
# view category info location
df_test_1['categories'][0] # we'll pull id and name

# pull category information of interest and place in seperate columns 
category_id = []
category_type = []
for i in range(len(df_test)):
    category_id.append(df_test_1['categories'][i][0]['id'])
    category_type.append(df_test_1['categories'][i][0]['name'])
df_test_1['category_id'] = category_id
df_test_1['category_type'] = category_type
df_test_1
# drop categories column
df_test_1.drop(columns=['categories','chains','link'])

In [None]:
# Make a for loop to normalize each coordinate call into a dataframe and check if each call pulled 50 results
did_it_work[0]['results']
def normalized_dataframe_discrepancy(did_it_work):
    # create empty data_set list
    df_set = []
    count = 0
    # For loop cycles through the each station call for individual normalization & call number vetting
    for i in range(len(did_it_work)):
        df = pd.json_normalize(did_it_work[i]['results'])
        df = df.iloc[:,:10]
        if len(df['categories']) < 50:
            print(f'dataframe{i} only has {len(df['categories'])} results')
            count += 1
    print(count)
# call function
views = normalized_dataframe_discrepancy(did_it_work)
print(views)

In [None]:
# Make a refined foursquare call for POI(park, cafe, and bar)
api_key = os.environ["FOURSQUARE_API_KEY"]
headers = {"Accept": "application/json"}
headers['Authorization'] = api_key

# get all lat and lon coordinates in a list
# category ids: 16032 -> park, 13034 -> cafe, 13003 -> bar

def foursquare_call(stations_data, radius =1000, limit=50):
    data = []
    url ="https://api.foursquare.com/v3/places/search"
    for i in range(len(stations_data)):
        params = {
            'll':f'{stations_data[i]['lat']},{stations_data[i]['lon']}',
            # adding categories of interest
            'categories': '16032,13034,13003',
            'radius': radius,
            'limit': 50
        }
        response_fs = requests.get(url,params=params,headers=headers)
        data_point= response_fs.json()
        data.append(data_point)
    return data

refined_call_response = foursquare_call(stations_data)

In [None]:
# export refined foursquare call json as file in local directory to avoid making multiple calls
with open('refined_fs_call.json', 'w') as f:
    json.dump(refined_call_response, f)

In [None]:
# Load refined foursquare call json file
with open('refined_fs_call.json') as f:
    refined_json = json.load(f)

In [None]:
# Take category info extraction logic and make into a function
def category_extract(df):
    # pull category information of interest and place in seperate columns 
    category_id = []
    category_type = []
    for j in range(len(df)):
        category_id.append(df['categories'][j][0]['id'])
        category_type.append(df['categories'][j][0]['name'])
    df['category_id'] = category_id
    df['category_type'] = category_type
    # drop categories column
    df.drop(columns=['categories','chains','link'])
    return df

# Create function to cycle through each result
def normalized_dataframe(refined_json):
    # create empty data_set list
    df_set = []
    # first loop cycles through the each station call for individual normalization
    for i in range(len(refined_json)):
        df = pd.json_normalize(refined_json[i]['results'])
        df = df.iloc[:,:10]
        df['station_number'] = i + 1
        # use extraction function on each dataframe and added to set
        df_set.append(category_extract(df))
    return df_set

foursquare_data_ = normalized_dataframe(refined_json)

In [81]:
# all individual dataframe calls combined and droped redundant columns
foursquare_data_clean = pd.concat(foursquare_data_)
foursquare_data_clean = foursquare_data_clean.drop(columns=['categories','chains','link','timezone','geocodes.drop_off.latitude','geocodes.drop_off.longitude'])
foursquare_data_clean

Unnamed: 0,fsq_id,closed_bucket,distance,name,geocodes.main.latitude,geocodes.main.longitude,station_number,category_id,category_type
0,4b7237fdf964a52064742de3,LikelyOpen,303,Kitanomaru Park (北の丸公園),35.691684,139.751192,1,16032,Park
1,4bdc0b433904a59398b34c9e,LikelyOpen,605,Kanda Brazil (神田伯剌西爾),35.695820,139.759360,1,13034,Café
2,4f509953e4b044218ffe9b9f,LikelyOpen,360,CRAFT BEER MARKET,35.695378,139.756771,1,13006,Beer Bar
3,532ace8c498ef90c9a5b3b35,LikelyOpen,266,Mori no Butchers (森のブッチャーズ),35.694734,139.756006,1,13003,Bar
4,51a98ec1498ec63158095450,LikelyOpen,424,九段坂公園,35.694711,139.748608,1,16032,Park
...,...,...,...,...,...,...,...,...,...
34,a63ccbd3eb9746bbb15260ea,LikelyOpen,754,BAR GOD,35.523504,139.721890,1582,13009,Cocktail Bar
35,d9f68abed2c34551a19354db,LikelyOpen,767,スナックマリーン,35.525654,139.722429,1582,13003,Bar
36,eb3a92a7a5f04318bce9b135,LikelyOpen,854,桜堀緑地,35.515283,139.728640,1582,16032,Park
37,2bfd19b4743e40a8d9288ca2,LikelyOpen,907,塩浜橋公園,35.523112,139.740218,1582,16032,Park


In [82]:
#store foursquare data for use in joining_data notebook
%store foursquare_data_clean

Stored 'foursquare_data_clean' (DataFrame)


# Yelp

In [None]:
import requests
import os
import json
import pandas

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [36]:
yelp_api = os.environ["YELP_API_KEY"]
headers = {
    'Authorization': f'Bearer {yelp_api}',
}

def yelp_call(stations_data):
    yelp_data = []
    url = 'https://api.yelp.com/v3/businesses/search'
    for i in range(3):
        params = {
            'term':'bars',
            'latitude':stations_data[i]['lat'],
            'longitude': stations_data[i]['lon'],
            'radius': 1000,
            'limit':50
        }
        response_yelp = requests.get(url,headers=headers,params=params)
        yelp_point = response_yelp.json()
        yelp_data.append(yelp_point)
    return yelp_data

yelp_answer = yelp_call(stations_data)

In [37]:
with open('yelp_call.json', 'w') as f:
    json.dump(yelp_answer, f)

In [38]:
# Load foursquare call json file
with open('yelp_call.json') as f:
    test_file = json.load(f)

In [None]:
print(test_file)
print(len(test_file))
print(len(test_file[0]['businesses']))
print(test_file[0]['businesses'][0])

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
yelp_df_set = []
for i in range(len(test_file)):
    yelp_df = pd.json_normalize(test_file[0]['businesses'])
    yelp_df_set.append(yelp_df)
yelp_df_set = pd.concat(yelp_df_set)
yelp_df_set.drop(columns=['image_url','url','transactions','attributes.business_temp_closed','attributes.menu_url','attributes.open24_hours','attributes.waitlist_reservation'])

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,transactions,...,location.address3,location.city,location.zip_code,location.country,location.state,location.display_address,attributes.business_temp_closed,attributes.menu_url,attributes.open24_hours,attributes.waitlist_reservation
0,5Nz-xEnQ-zhcS-NxVN_vKA,mr-mayer-千代田区,Mr.Mayer,,False,https://www.yelp.com/biz/mr-mayer-%E5%8D%83%E4...,2,"[{'alias': 'lounges', 'title': 'Lounges'}]",4.0,[],...,,Chiyoda,102-0073,JP,13,"[九段北1-3-10, Chiyoda, 東京都 〒102-0073, Japan]",,,,
1,UP6Uj73rkXzKMSSPzt99xA,pool-と-darts-jack-水道橋店-千代田区,Pool & Darts Jack Suidobashi,https://s3-media4.fl.yelpcdn.com/bphoto/RCxAD_...,False,https://www.yelp.com/biz/pool-%E3%81%A8-darts-...,1,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",4.0,[],...,,Chiyoda,101-0061,JP,13,"[三崎町1丁目4-8, 愛光舎ビル 6F, Chiyoda, 東京都 〒101-0061, ...",,,,
2,m5v3L0BNxXXJzeuZlpjGHg,神田フラックス-千代田区,Kanda Flux,https://s3-media2.fl.yelpcdn.com/bphoto/cu3WiZ...,False,https://www.yelp.com/biz/%E7%A5%9E%E7%94%B0%E3...,3,"[{'alias': 'bars', 'title': 'Bars'}]",5.0,[],...,,Chiyoda,101-0047,JP,13,"[内神田3丁目16-10, Chiyoda, 東京都 〒101-0047, Japan]",,,,
3,wXqMtQ8bCzaufWyv3vnOxQ,big-boy-千代田区,Big Boy,https://s3-media3.fl.yelpcdn.com/bphoto/nHYaBR...,False,https://www.yelp.com/biz/big-boy-%E5%8D%83%E4%...,1,"[{'alias': 'jazzandblues', 'title': 'Jazz & Bl...",5.0,[],...,,Chiyoda,101-0051,JP,13,"[神田神保町1丁目11, Chiyoda, 東京都 〒101-0051, Japan]",,,,
4,7HwPMQbp6Djkz_VYuFySmA,明治屋セカンド-千代田区,Meijiya Second,https://s3-media1.fl.yelpcdn.com/bphoto/7-jtYp...,False,https://www.yelp.com/biz/%E6%98%8E%E6%B2%BB%E5...,2,"[{'alias': 'bars', 'title': 'Bars'}]",4.0,[],...,,Chiyoda,,JP,13,"[神田神保町2-12, 石井ビル 1F, Chiyoda, 東京都, Japan]",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,iZVwblXOFoJiTShpVuMBaw,ラウンジピット-千代田区,JMB Lounge Pit,https://s3-media4.fl.yelpcdn.com/bphoto/vCgFz-...,False,https://www.yelp.com/biz/%E3%83%A9%E3%82%A6%E3...,1,"[{'alias': 'lounges', 'title': 'Lounges'}]",5.0,[],...,,Chiyoda,,JP,13,"[神田神保町一丁目105, 1F, Chiyoda, 東京都, Japan]",,,,
46,VRaU01Ts8Wv2l2qW3gDpYA,オーク-千代田区-2,Oak,https://s3-media2.fl.yelpcdn.com/bphoto/FyC-d3...,False,https://www.yelp.com/biz/%E3%82%AA%E3%83%BC%E3...,5,"[{'alias': 'bars', 'title': 'Bars'}]",4.2,[],...,,Chiyoda,100-0005,JP,13,"[丸ノ内1-9-1, 東京ステーションホテル2F, Chiyoda, 東京都 〒100-00...",,,,
47,z18u1seInAXz2nisM4X0lA,jazz-と-bar-concert-千代田区,Jazz & Bar Concert,https://s3-media3.fl.yelpcdn.com/bphoto/rWJ-HL...,False,https://www.yelp.com/biz/jazz-%E3%81%A8-bar-co...,0,"[{'alias': 'bars', 'title': 'Bars'}]",0.0,[],...,,Chiyoda,,JP,13,"[神田神保町1-62-4, 和光ビルジング 1F, Chiyoda, 東京都, Japan]",,,,
48,4koAjBER4L8pcojfecvVZQ,クラフトビアサーバーランド-新宿区,Craft Beer Server Land,https://s3-media1.fl.yelpcdn.com/bphoto/6CwhhN...,False,https://www.yelp.com/biz/%E3%82%AF%E3%83%A9%E3...,5,"[{'alias': 'beerbar', 'title': 'Beer Bar'}]",4.4,[],...,,Shinjuku,162-0825,JP,13,"[神楽坂2-9, 大川ビル B1F, Shinjuku, 東京都 〒162-0825, Ja...",,,,


Put your parsed results into a DataFrame

In [54]:
yelp_df_set_refined = yelp_df_set.drop(columns=['image_url','url','transactions','attributes.business_temp_closed','attributes.menu_url','attributes.open24_hours','attributes.waitlist_reservation'])
yelp_df_set_refined

Unnamed: 0,id,alias,name,is_closed,review_count,categories,rating,price,phone,display_phone,...,coordinates.latitude,coordinates.longitude,location.address1,location.address2,location.address3,location.city,location.zip_code,location.country,location.state,location.display_address
0,5Nz-xEnQ-zhcS-NxVN_vKA,mr-mayer-千代田区,Mr.Mayer,False,2,"[{'alias': 'lounges', 'title': 'Lounges'}]",4.0,￥￥,+81362723309,+81 3-6272-3309,...,35.696033,139.751663,九段北1-3-10,,,Chiyoda,102-0073,JP,13,"[九段北1-3-10, Chiyoda, 東京都 〒102-0073, Japan]"
1,UP6Uj73rkXzKMSSPzt99xA,pool-と-darts-jack-水道橋店-千代田区,Pool & Darts Jack Suidobashi,False,1,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",4.0,,+81352808282,+81 3-5280-8282,...,35.701127,139.755672,三崎町1丁目4-8,愛光舎ビル 6F,,Chiyoda,101-0061,JP,13,"[三崎町1丁目4-8, 愛光舎ビル 6F, Chiyoda, 東京都 〒101-0061, ..."
2,m5v3L0BNxXXJzeuZlpjGHg,神田フラックス-千代田区,Kanda Flux,False,3,"[{'alias': 'bars', 'title': 'Bars'}]",5.0,,+81335263383,+81 3-3526-3383,...,35.691866,139.769532,内神田3丁目16-10,,,Chiyoda,101-0047,JP,13,"[内神田3丁目16-10, Chiyoda, 東京都 〒101-0047, Japan]"
3,wXqMtQ8bCzaufWyv3vnOxQ,big-boy-千代田区,Big Boy,False,1,"[{'alias': 'jazzandblues', 'title': 'Jazz & Bl...",5.0,￥￥,+81332334343,+81 3-3233-4343,...,35.695562,139.758619,神田神保町1丁目11,,,Chiyoda,101-0051,JP,13,"[神田神保町1丁目11, Chiyoda, 東京都 〒101-0051, Japan]"
4,7HwPMQbp6Djkz_VYuFySmA,明治屋セカンド-千代田区,Meijiya Second,False,2,"[{'alias': 'bars', 'title': 'Bars'}]",4.0,,+81332398086,+81 3-3239-8086,...,35.696508,139.756611,神田神保町2-12,石井ビル 1F,,Chiyoda,,JP,13,"[神田神保町2-12, 石井ビル 1F, Chiyoda, 東京都, Japan]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,iZVwblXOFoJiTShpVuMBaw,ラウンジピット-千代田区,JMB Lounge Pit,False,1,"[{'alias': 'lounges', 'title': 'Lounges'}]",5.0,￥,,,...,35.694277,139.760426,神田神保町一丁目105,1F,,Chiyoda,,JP,13,"[神田神保町一丁目105, 1F, Chiyoda, 東京都, Japan]"
46,VRaU01Ts8Wv2l2qW3gDpYA,オーク-千代田区-2,Oak,False,5,"[{'alias': 'bars', 'title': 'Bars'}]",4.2,￥￥￥,+81352201111,+81 3-5220-1111,...,35.680681,139.766080,丸ノ内1-9-1,東京ステーションホテル2F,,Chiyoda,100-0005,JP,13,"[丸ノ内1-9-1, 東京ステーションホテル2F, Chiyoda, 東京都 〒100-00..."
47,z18u1seInAXz2nisM4X0lA,jazz-と-bar-concert-千代田区,Jazz & Bar Concert,False,0,"[{'alias': 'bars', 'title': 'Bars'}]",0.0,,+81352837117,+81 3-5283-7117,...,35.698639,139.757940,神田神保町1-62-4,和光ビルジング 1F,,Chiyoda,,JP,13,"[神田神保町1-62-4, 和光ビルジング 1F, Chiyoda, 東京都, Japan]"
48,4koAjBER4L8pcojfecvVZQ,クラフトビアサーバーランド-新宿区,Craft Beer Server Land,False,5,"[{'alias': 'beerbar', 'title': 'Beer Bar'}]",4.4,￥￥,+81362281891,+81 3-6228-1891,...,35.700898,139.741257,神楽坂2-9,大川ビル B1F,,Shinjuku,162-0825,JP,13,"[神楽坂2-9, 大川ビル B1F, Shinjuku, 東京都 〒162-0825, Ja..."


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

The yelp API provides are more detailed information such as reviews, ratings,phone numbers about each result compared to foursquare. However, yelp results categorization system has discrepancies in how certain establishments are classified. Results for coffee and tea shops were provided when only "bars" were selected in the initial call. The other problem with yelp is in its call request limitations: 1.only able to make 300 per day 2. unable to do multiple categories in a single request.

Foursquare has more value for our larger dataset due to the API: 1. having the ability to reselect multiple categories in a single request, 2. having a more robust category classification system that automatically pulls subcategory results that are relevent to the main POI category and 3. able to make larger calls in 1 day. These 3 features make the geospacial data in foursquare more useful for statistically significant insights. 

Get the top 10 restaurants according to their rating

In [None]:
yelp_df_set_refined.sort_values(by=['review_count','rating'], ascending=False).head(10)