In [160]:
# imports pandas
import pandas as pd
import os
import requests
import numpy as np


<IPython.core.display.Javascript object>

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

**City: Vancouver**

In [129]:
fs_client_id = os.environ['FSQ_CLIENT_ID']
fs_client_secret = os.environ['FSQ_CLIENT_SECRET']

fsq_api_key = os.environ['FSQ_API_KEY']

In [132]:
# from data import citybikes_vancouver.csv
citybikes_vancouver = pd.read_csv('../data/citybikes_vancouver.csv')

citybikes_vancouver['name'].count()

241

### Sample Query for 1 Station

In [None]:
#query = 'bike'
lati = 48.865983
long = 2.275725
radius = 1000

url = 'https://api.foursquare.com/v3/places/search?ll={},{}&radius={}'.format(lati, long, radius)

headers = {
    "accept": "application/json",
    "Authorization": fsq_api_key
}
response = requests.get(url, headers=headers)
json_data = response.json()

#put data into dataframe, from json_data dictionary first element is results and inside results is a list of dictionaries
# i want to get the name, location, categories, and id
df = pd.DataFrame(json_data['results'])


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
# normalize the columns
fsq_id = df['fsq_id']
location = pd.json_normalize(df['location'])
geocodes = pd.json_normalize(df['geocodes'])
category = pd.json_normalize(df['categories'])
category = pd.json_normalize(category[0])


address = location.formatted_address
lat = geocodes['main.latitude']
long = geocodes['main.longitude']
distance = df['distance']
name = df['name']
category = category['name']


site = 'Foursquare'


Put your parsed results into a DataFrame

In [None]:
# create a new dataframe with address, lat, long, distance in meters
bikestation_poi = pd.DataFrame({'id': fsq_id, 'category': category, 'name': name, 'address': address, 'lat': lat, 'long': long, 'site': site})

#select all records from bikestation
bikestation_poi

### Using Citybike (lat, long) data fetching records from foursquare. (For all stations in citybike)

In [None]:
radius = 1000
citybike_foursquare = pd.DataFrame()
site = 'Foursquare'

#sample of 3 data from citybikes_vancouver
#sample = citybikes_vancouver.sample(3)


for index, row in citybikes_vancouver.iterrows():    
    station_name = row['name']
    lat = row['latitude']
    long = row['longitude']
    cb_id = row['cb_id']
    
    # build the url
    url = 'https://api.foursquare.com/v3/places/search?ll={},{}&radius={}'.format(lat, long, radius)
    headers = {
    "accept": "application/json",
    "Authorization": fsq_api_key
    }
    response = requests.get(url, headers=headers)
    json_data = response.json()

    df = pd.DataFrame(json_data['results'])

    # normalize the columns
    location = pd.json_normalize(df['location'])
    geocodes = pd.json_normalize(df['geocodes'])
    category = pd.json_normalize(df['categories'])
    category = pd.json_normalize(category[0])

    fsq_id = df['fsq_id']
    address = location.formatted_address
    lat = geocodes['main.latitude']
    long = geocodes['main.longitude']
    distance = df['distance']
    name = df['name']
    category = category['name']    

    # create a new dataframe with address, lat, long, distance in meters
    bikestation_poi = pd.DataFrame({'fsq_id': fsq_id, 'station':station_name,'category': category, 'name': name, 'address': address, 'lat': lat, 'long': long, 'site': site})

    #add cb_id to bikestation_poi
    bikestation_poi['cb_id'] = cb_id

    # append to the citybike_foursquare dataframe
    citybike_foursquare = citybike_foursquare.append(bikestation_poi)
        


Saving it in csv to use it later and reducing api calls

In [137]:
citybike_foursquare.head()

# save the dataframe to a csv file
citybike_foursquare.to_csv('../data/cb_fsq_vancouver.csv', index=False)

Getting tips information using Foursquare API and making a separate dataframe

In [138]:
#make an empty tips_df 
tips = []

# tips_url = 'https://api.foursquare.com/v3/places/{}/tips'.format(fsq_id)

headers = {
    "accept": "application/json",
    "Authorization": fsq_api_key
}

for index, row in citybike_foursquare.iterrows():
    #get fsq_id for this row
    fsq_id = row['fsq_id']
    tips_url = 'https://api.foursquare.com/v3/places/{}/tips'.format(fsq_id)
    response = requests.get(tips_url, headers=headers)
    
    try:
        json_data = response.json()
    except:
        json_data = []

    # if json data is not empty
    if len(json_data) > 0:
        #normalize json data
        tip = pd.json_normalize(json_data)
        # put in dataframe
        tip_df = pd.DataFrame(tip)
        #add fsq_id to dataframe
        tip_df['fsq_id'] = fsq_id
        #append to tips_df
        tips.append(tip_df)




In [141]:
#tips_df dataframe
tips_df = pd.concat(tips)
#save to csv
#tips_df.to_csv('../data/cb_fsq_tips_vancouver.csv', index=False)


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [165]:
yelp_client_id = 'rt6YwOeyXs5Oqx8HIn-6PQ'
yelp_api_key = os.environ['YELP_API_KEY']


headers = {
    "accept": "application/json",
    "Authorization": f'Bearer {yelp_api_key}'
}

#### Using single station api call

In [110]:
#query = 'bike'
lati = 48.865983
long = 2.275725
radius = 1000

# format this url with latitude and longitude
url = 'https://api.yelp.com/v3/businesses/search?latitude={}&longitude={}&radius={}&sort_by=best_match&limit=10'.format(
    lati, long, radius)

response = requests.get(url, headers=headers)
json_data = response.json()

# put data into dataframe in tabular format afer normalizing the json data
df = pd.json_normalize(json_data['businesses'])


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [112]:
cate_df = pd.json_normalize(df['categories'])
# if there is more than one category and they are not none then select the third category
cate_df['cate'] = np.where(cate_df[2].isnull(), np.where(cate_df[1].isnull(), cate_df[0], cate_df[1]), cate_df[2])
# select title from cate_df['cate'] and put it in a new dataframe called category
category = pd.json_normalize(cate_df['cate'])
category = category['title']

name = pd.DataFrame(df['name'])

address = pd.DataFrame(df['location.display_address'])
address = address.rename(columns={'location.display_address':'address'})

lat = pd.DataFrame(df['coordinates.latitude'])
lat = lat.rename(columns={'coordinates.latitude':'lat'})

long = pd.DataFrame(df['coordinates.longitude'])
long = long.rename(columns={'coordinates.longitude':'long'})

rating = df['rating']

review_count = df['review_count']

site = "Yelp"


Put your parsed results into a DataFrame

In [113]:
# create a new dataframe with category, name, address, lat, long, rating, review_count, site
yelp = pd.concat([category, name, address, lat, long, rating, review_count], axis=1)
yelp['site'] = site

In [None]:
yelp.head()

#### Using Citybike (lat, long) data fetching records from yelp. (For all stations in citybike)

In [None]:
radius = 1000
citybike_yelp = pd.DataFrame()
site = 'Yelp'

#take 3 random rows from citybikes_vancouver
#sample = citybikes_vancouver.sample(n=3)

for index, row in citybikes_vancouver.iterrows():
    lat = row['latitude']
    long = row['longitude']
    cb_id = row['cb_id']
    
    url = 'https://api.yelp.com/v3/businesses/search?latitude={}&longitude={}&radius={}&sort_by=best_match&limit=20'.format(
        lat, long, radius)

    response = requests.get(url, headers=headers)
    json_data = response.json()
    
    # put data into dataframe in tabular format afer normalizing the json data
    df = pd.json_normalize(json_data['businesses'])    
    
    cate_df = pd.json_normalize(df['categories'])
    # if there is more than one category and they are not none then select the third category
    try:
        cate_df['cate'] = np.where(cate_df[2].isnull(), np.where(cate_df[1].isnull(), cate_df[0], cate_df[1]), cate_df[2])
    except:
        try:
            cate_df['cate'] = np.where(cate_df[1].isnull(), cate_df[0], cate_df[1])
        except:
            cate_df['cate'] = cate_df[0]
            
    # select title from cate_df['cate'] and put it in a new dataframe called category
    category = pd.json_normalize(cate_df['cate'])
    category = category['title']

    name = pd.DataFrame(df['name'])

    address = pd.DataFrame(df['location.display_address'])
    address = address.rename(columns={'location.display_address':'address'})

    lat = pd.DataFrame(df['coordinates.latitude'])
    lat = lat.rename(columns={'coordinates.latitude':'lat'})

    long = pd.DataFrame(df['coordinates.longitude'])
    long = long.rename(columns={'coordinates.longitude':'long'})

    rating = df['rating']

    review_count = df['review_count']

    yelp = pd.concat([category, name, address, lat, long, rating, review_count], axis=1)
    yelp['site'] = site
    yelp['cb_id'] = cb_id

    # append to the citybike_yelp dataframe
    citybike_yelp = citybike_yelp.append(yelp)

    
        




In [175]:
#save citybike_yelp to csv
#citybike_yelp.to_csv('../data/cb_yelp_vancouver.csv', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

```
According to me Yelp provided me with complete data regarding the ratings of the points of interest near the bike station.

From foursquare data, review_counts can be figured out using a separate api call which requires me to pass fsq_id for the poi. However, through Yelp only a single api call can fetch ratings and review counts.
```

Get the top 10 restaurants according to their rating

In [181]:
#get top 10 ratings from yelp where name contains restaurent and they are unique
citybike_yelp[citybike_yelp['name'].str.contains('Restaurant')].sort_values(by='rating', ascending=False).drop_duplicates(subset='name').head(10)


Unnamed: 0,title,name,address,lat,long,rating,review_count,site,cb_id
16,Canadian (New),Arike Restaurant,"[1725 Davie St, Vancouver, BC V6G 1W5, Canada]",49.286805,-123.140856,5.0,25,Yelp,cb_55
13,Thai,Unchai Thai Restaurant,"[2351 Burrard Street, Vancouver, BC V6J 3J2, C...",49.265112,-123.145913,4.5,79,Yelp,cb_240
14,French,Le Crocodile Restaurant,"[909 Burrard Street, Vancouver, BC V6Z 2N2, Ca...",49.282282,-123.124911,4.5,428,Yelp,cb_171
18,French,St Lawrence Restaurant,"[269 Powell Street, Vancouver, BC V6A 1G3, Can...",49.283258,-123.097993,4.5,132,Yelp,cb_155
18,Ethiopian,Fassil Ethiopian Restaurant,"[736 Broadway E, Vancouver, BC V5T 1X9, Canada]",49.262317,-123.088331,4.5,145,Yelp,cb_157
15,Ethiopian,Axum Restaurant,"[1279 E Hastings Street, Vancouver, BC V6A 1S4...",49.281494,-123.077796,4.5,67,Yelp,cb_169
14,Indian,Ashiana Tandoori Restaurant,"[1440 Kingsway Street, Vancouver, BC V5N 2R5, ...",49.249472,-123.074921,4.5,81,Yelp,cb_143
19,French,Five Sails Restaurant,"[401-999 Canada Place, Vancouver, BC V6C 3E1, ...",49.288031,-123.113292,4.5,174,Yelp,cb_8
19,Breakfast & Brunch,The Tipper Restaurant,"[2066 Kingsway, Vancouver, BC V5N 2T3, Canada]",49.244523,-123.06414,4.0,122,Yelp,cb_238
13,African,Harambe Restaurant,"[2149 Commercial Drive, Vancouver, BC V5N 4B3,...",49.265519,-123.069867,4.0,133,Yelp,cb_141
