# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [12]:
# imports
import requests
import pandas as pd
import time

citybikes_df = pd.read_csv("../data/manchester_city_bikes.csv")

results = []

for index, row in citybikes_df.iterrows():
    latitude = row['Latitude']
    longitude = row['Longitude']

    url = 'https://api.foursquare.com/v3/places/search'
    params = {
        'll': f'{latitude},{longitude}',
        'categoryId': '12058,19014,13065',    # for restaurants and bars
        'radius': 1000,
        'fields': 'name,location,rating,categories,distance,description,popularity,price',
        'sort': 'RATING'
    }
    headers = {
        'Accept': 'application/json',
        'Authorization': "fsq3TOxk4HaeuDdUc/VJxQfiZF6UlwLSQ9WeOXHRg9WZPBQ="
    }

    # Sleep 1 second between API calls to avoid restriction
    time.sleep(1.0)

    response = requests.get(url, params=params, headers=headers)
    if response.status_code != 200:
        print("Request failed. Status code:", response.status_code)
    else:
        data = response.json()

    result_field = data.get('results')
    frame = pd.DataFrame(result_field)
    results.append({
        'Latitude': latitude,
        'Longitude': longitude,
        'Foursquare Name': frame['name'],
        'Distance': frame['distance'],
        'Popularity': frame['popularity'],
        'Rating': frame['rating'],
        #'Price': frame['price']
        })

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

{'name': 'The Jackalope', 'category': 'Restaurant', 'distance': 97, 'latitude': 51.519982, 'longitude': -0.146264}
{'name': 'The Harley Street Clinic', 'category': 'Hospital', 'distance': 198, 'latitude': 51.520074, 'longitude': -0.147765}
{'name': 'Honey & Smoke', 'category': 'Middle Eastern Restaurant', 'distance': 224, 'latitude': 51.52259, 'longitude': -0.143671}
{'name': 'Sushi Atelier', 'category': 'Sushi Restaurant', 'distance': 240, 'latitude': 51.519519, 'longitude': -0.14224}
{'name': 'Artesian', 'category': 'Restaurant', 'distance': 339, 'latitude': 51.517801, 'longitude': -0.143733}
{'name': 'Pizzeria Mozza', 'category': 'Pizzeria', 'distance': 360, 'latitude': 51.517552, 'longitude': -0.142845}
{'name': 'Passyunk Avenue', 'category': 'American Restaurant', 'distance': 353, 'latitude': 51.522097, 'longitude': -0.140307}
{'name': 'Brazilian Gourmet', 'category': 'Brazilian Restaurant', 'distance': 368, 'latitude': 51.521836, 'longitude': -0.140099}
{'name': 'The Wigmore', 'c

Put your parsed results into a DataFrame

In [13]:
# Creating DataFrame
fs_poi_df = pd.DataFrame(results)

# Displaying the DataFrame
fs_poi_df

Unnamed: 0,Latitude,Longitude,Foursquare Name,Distance,Popularity,Rating
0,53.462542,-2.216341,0 Mr Knafah 1 ...,0 1004 1 851 2 950 3 966 4 ...,0 0.983192 1 0.988917 2 0.549653 3 ...,0 8.6 1 8.3 2 8.2 3 8.1 4 8.1 5...
1,53.481139,-2.236058,0 The John Rylands Library 1 ...,0 876 1 553 2 387 3 597 4 934 5...,0 0.994466 1 0.967828 2 0.992752 3 ...,0 9.5 1 9.4 2 9.4 3 9.3 4 9.3 5...
2,53.469170,-2.225237,0 The Manchester Museum 1 ...,0 649 1 856 2 701 3 942 4 846 5...,0 0.995988 1 0.956492 2 0.989402 3 ...,0 8.7 1 8.6 2 8.5 3 8.4 4 8.0 5...
3,53.462380,-2.211227,0 Mr Knafah 1 Ven...,0 942 1 911 2 722 3 728 4 ...,0 0.983192 1 0.992637 2 0.957145 3 ...,0 8.6 1 7.5 2 7.3 3 6.8 4 6.7 5...
4,53.451169,-2.236843,0 Ariyadh Restaurant 1 ...,0 466 1 919 2 608 3 917 4 918 5...,0 0.991892 1 0.549653 2 0.998478 3 ...,0 8.5 1 8.2 2 8.2 3 8.1 4 8.1 5...
...,...,...,...,...,...,...
266,53.483487,-2.218778,0 Rudy's 1 ...,0 707 1 946 2 640 3 653 4 570 5...,0 0.967828 1 0.978488 2 0.976666 3 ...,0 9.4 1 9.1 2 9.1 3 9.0 4 8.9 5...
267,53.486174,-2.294865,0 Taste of Portugal 1 Chandos Chippy ...,0 84 1 347 2 655 3 1036 4 ...,0 0.973185 1 0.954032 2 0.986004 3 ...,0 7.5 1 6.9 2 6.6 3 6.4 4 6.4 5...
268,53.471395,-2.215647,0 Unit 9 Cloudwater Taproom 1 ...,0 846 1 834 2 777 3 680 4 ...,0 0.976666 1 0.978219 2 0.982999 3 ...,0 9.1 1 9.0 2 8.4 3 7.8 4 7.7 5...
269,53.481608,-2.204621,0 Etihad Stadium 1 ...,0 330 1 409 2 843 3 318 4 484 5...,0 0.999816 1 0.996949 2 0.985928 3 ...,0 8.7 1 8.0 2 7.9 3 7.8 4 7.6 5...


In [14]:
# Specify the file path for the CSV
csv_file_path = "../data/manchester_foursquare.csv"

# Save the DataFrame to CSV
fs_poi_df.to_csv(csv_file_path, index=False)

In [15]:
# Data Transformation

fs_poi_df['Average_dist_F'] = fs_poi_df['Distance'].apply(lambda x: sum(x) / len(x))
fs_poi_df['Average_popularity_F'] = fs_poi_df['Popularity'].apply(lambda x: sum(x) / len(x))
fs_poi_df['Average_rating_F'] = fs_poi_df['Rating'].apply(lambda x: sum(x) / len(x))

fs_poi_df['Max_rating_F'] = fs_poi_df['Rating'].apply(lambda x: max(x))
fs_poi_df['Max_popularity_F'] = fs_poi_df['Popularity'].apply(lambda x: max(x))

fs_poi_df['Min_rating_F'] = fs_poi_df['Rating'].apply(lambda x: min(x))
fs_poi_df['Min_popularity_F'] = fs_poi_df['Popularity'].apply(lambda x: min(x))

In [17]:
# Cleaning

fs_poi_df.drop(columns=['Foursquare Name', 'Distance', 'Popularity', 'Rating'], inplace=True)
fs_poi_df

Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F,Max_rating_F,Max_popularity_F,Min_rating_F,Min_popularity_F
0,53.462542,-2.216341,926.8,0.935496,8.06,8.6,0.998163,7.7,0.549653
1,53.481139,-2.236058,560.7,0.988236,9.26,9.5,0.999208,9.1,0.963762
2,53.469170,-2.225237,755.9,0.986272,8.09,8.7,0.998809,7.6,0.956492
3,53.462380,-2.211227,814.2,0.947587,6.80,8.6,0.995227,5.9,0.712119
4,53.451169,-2.236843,852.0,0.936960,8.07,8.5,0.998478,7.8,0.549653
...,...,...,...,...,...,...,...,...,...
266,53.483487,-2.218778,761.8,0.981776,8.91,9.4,0.993014,8.6,0.967828
267,53.486174,-2.294865,599.5,0.807861,6.46,7.5,0.995243,5.9,0.081560
268,53.471395,-2.215647,843.5,0.926471,7.85,9.1,0.990716,7.1,0.508842
269,53.481608,-2.204621,501.3,0.939865,7.50,8.7,0.999816,6.6,0.560989


In [18]:
# Specify the file path for the CSV
csv_file_path = "../data/manchester_foursquare_cleaned.csv"

# Save the DataFrame to CSV
fs_poi_df.to_csv(csv_file_path, index=False)

In [19]:
duplicate_rows = fs_poi_df[fs_poi_df.duplicated()]

print("Duplicate Rows:")
duplicate_rows

Duplicate Rows:


Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F,Max_rating_F,Max_popularity_F,Min_rating_F,Min_popularity_F


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [20]:
results = []

for index, row in citybikes_df.iterrows():       # citybikes_df is data from CityBikes API already from the Foursquare section above.
    latitude = row['Latitude']
    longitude = row['Longitude']

    url = 'https://api.yelp.com/v3/businesses/search'
    params = {
        'latitude': latitude,
        'longitude': longitude,
        'radius': 1000,
        'categories': 'restaurants,bars',
        'sort_by': 'rating'
    }
    headers = {
        'Authorization': f'Bearer BfzFVgYUAXJ-N3GRveE1dPQ9GtrE4LYaP-pzPb_gVgvhczQB_WOEPx4FcDofSM8sJH0_OqEphmCm8GC4aVC_RSTJQlB_O7ZRT-GbGA6d_tbXvkz2K94BI3XAeUbMZXYx'
    }

    # Sleep 1 second between API calls to avoid restriction
    time.sleep(1.0)

    response = requests.get(url, params=params, headers=headers)
    if response.status_code != 200:
        print("Request failed. Status code:", response.status_code)
        break
    else:
        data = response.json()

    yelp_results = data.get('businesses', [])
    frame = pd.DataFrame(yelp_results)
    results.append({
    'Latitude': latitude,
    'Longitude': longitude,
    'Yelp Name': frame['name'],
    'Distance': frame['distance'],
    'Review count': frame['review_count'],
    'Rating': frame['rating'],
    'Price': frame['price']
    })

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

{'category': ['Indian', 'Breakfast & Brunch'], 'location': 'Kensington', 'rating': 4.6, 'latitude': 51.5012974019964, 'longitude': -0.191184893250465, 'distance': 480.55644473374974, 'price': '££'}
{'category': ['New American'], 'location': 'London', 'rating': 4.2, 'latitude': 51.497629045444, 'longitude': -0.19607751802576, 'distance': 243.11490323319143, 'price': '£££'}
{'category': ['Breakfast & Brunch', 'British', 'Cocktail Bars'], 'location': 'London', 'rating': 4.2, 'latitude': 51.4999833, 'longitude': -0.1956639, 'distance': 149.75651745881814, 'price': '££'}
{'category': ['Italian'], 'location': 'London', 'rating': 4.5, 'latitude': 51.4985799, 'longitude': -0.2000019, 'distance': 200.80368368189696, 'price': '£££'}
{'category': ['British'], 'location': 'London', 'rating': 4.7, 'latitude': 51.50405773515, 'longitude': -0.192783395566043, 'distance': 595.731604115835, 'price': '£££'}
{'category': ['French'], 'location': 'London', 'rating': 4.3, 'latitude': 51.4992288, 'longitude'

Put your parsed results into a DataFrame

In [23]:
# Creating DataFrame
yp_poi_df = pd.DataFrame(results)

# Displaying the DataFrame
yp_poi_df


Unnamed: 0,Latitude,Longitude,Yelp Name,Distance,Review count,Rating,Price
0,53.462542,-2.216341,0 Curry Mile 1 ...,0 1044.925809 1 949.263144 2 985...,0 9 1 23 2 4 3 28 4 11 5...,0 5.0 1 4.5 2 4.8 3 4.5 4 ...,0 ££ 1 ££ 2 ££ 3 ££ 4 ...
1,53.481139,-2.236058,0 Dim Sum Su 1 ...,0 1078.228822 1 625.771505 2 454...,0 8 1 7 2 25 3 6 4 6 5...,0 4.9 1 4.9 2 4.8 3 4.8 4 ...,0 £ 1 NaN 2 ££ 3 NaN 4 ...
2,53.469170,-2.225237,0 Esquire's Coffee 1 ...,0 1071.660331 1 973.683900 2 1213...,0 4 1 28 2 8 3 14 4 7 5...,0 4.8 1 4.5 2 4.5 3 4.4 4 ...,0 NaN 1 ££ 2 ££ 3 £ 4 ...
3,53.462380,-2.211227,0 Curry Mile 1 ...,0 1245.201143 1 1268.407888 2 1190...,0 9 1 7 2 23 3 4 4 28 5...,0 5.0 1 4.9 2 4.5 3 4.8 4 ...,0 ££ 1 NaN 2 ££ 3 ££ 4 ...
4,53.451169,-2.236843,0 Curry Mile 1 ...,0 887.781410 1 951.228001 2 584.10...,0 9 1 7 2 4 3 23 4 4 5...,0 5.0 1 4.9 2 5.0 3 4.5 4 ...,0 ££ 1 NaN 2 £ 3 ££ 4 ...
...,...,...,...,...,...,...,...
266,53.483487,-2.218778,0 Elnecot 1 ...,0 676.619331 1 599.553271 2 691...,0 7 1 11 2 51 3 19 4 5 5...,0 4.9 1 4.8 2 4.4 3 4.4 4 ...,0 NaN 1 £ 2 £ 3 £ 4 ...
267,53.486174,-2.294865,0 The Winston 1 ...,0 740.010307 1 26.216933 2 692.55...,0 1 1 1 2 1 3 2 4 3 5 ...,0 5.0 1 5.0 2 5.0 3 4.5 4 ...,0 NaN 1 NaN 2 NaN 3 NaN 4 ...
268,53.471395,-2.215647,0 Plymouth Grove Restaurant 1 Th...,0 921.246924 1 931.872351 2 636...,0 3 1 20 2 1 3 1 4 46 5...,0 4.7 1 4.1 2 5.0 3 5.0 4 ...,0 NaN 1 £ 2 NaN 3 NaN 4 ...
269,53.481608,-2.204621,0 The Plaice 1 Mary D'Bem...,0 773.287299 1 498.460607 2 532.36781...,0 1 1 1 2 2 3 2 4 3 5 1 6 ...,0 5.0 1 5.0 2 4.5 3 4.5 4 3.0 5...,0 NaN 1 ££ 2 NaN 3 NaN 4 £ 5...


In [24]:
# Specify the file path for the CSV
csv_file_path = "../data/manchester_yelp.csv"

# Save the DataFrame to CSV
yp_poi_df.to_csv(csv_file_path, index=False)

In [26]:
# Data Transformation

yp_poi_df['Average_distance_Y'] = yp_poi_df['Distance'].apply(lambda x: sum(x) / len(x))
yp_poi_df['Average_review_count_Y'] = yp_poi_df['Review count'].apply(lambda x: sum(x) / len(x))
yp_poi_df['Average_rating_Y'] = yp_poi_df['Rating'].apply(lambda x: sum(x) / len(x))

yp_poi_df['Max_rating_Y'] = yp_poi_df['Rating'].apply(lambda x: max(x))
yp_poi_df['Max_review_count'] = yp_poi_df['Review count'].apply(lambda x: max(x))

yp_poi_df['Min_rating_Y'] = yp_poi_df['Rating'].apply(lambda x: min(x))
yp_poi_df['Min_review_count'] = yp_poi_df['Review count'].apply(lambda x: min(x))


In [27]:
# Cleaning

yp_poi_df.drop(columns=['Yelp Name', 'Distance', 'Review count', 'Rating', 'Price'], inplace=True)
yp_poi_df

Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y,Max_rating_Y,Max_review_count,Min_rating_Y,Min_review_count
0,53.462542,-2.216341,879.335943,14.100000,4.435000,5.0,78,4.0,1
1,53.481139,-2.236058,532.596303,17.200000,4.725000,4.9,70,4.5,5
2,53.469170,-2.225237,919.578830,13.450000,4.465000,5.0,47,4.1,2
3,53.462380,-2.211227,1107.131736,13.900000,4.495000,5.0,78,4.2,2
4,53.451169,-2.236843,901.395749,13.700000,4.520000,5.0,78,4.1,1
...,...,...,...,...,...,...,...,...,...
266,53.483487,-2.218778,888.110170,22.050000,4.395000,5.0,68,4.1,2
267,53.486174,-2.294865,443.417944,1.733333,3.420000,5.0,4,1.0,1
268,53.471395,-2.215647,1014.164561,9.450000,4.015000,5.0,46,3.1,1
269,53.481608,-2.204621,641.720977,1.571429,3.714286,5.0,3,1.0,1


In [28]:
# Specify the file path for the CSV
csv_file_path = "../data/manchester_yelp_cleaned.csv"

# Save the DataFrame to CSV
yp_poi_df.to_csv(csv_file_path, index=False)

In [29]:
duplicate_rows = yp_poi_df[yp_poi_df.duplicated()]

print("Duplicate Rows:")
duplicate_rows

Duplicate Rows:


Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y,Max_rating_Y,Max_review_count,Min_rating_Y,Min_review_count


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 


In evaluating the data quality for the city of London, it has been noted that the Yelp API provides a more comprehensive dataset, as evidenced by the additional attributes available for each Point of Interest (POI) within the city.

Get the top 10 restaurants according to their rating

In [30]:
fs_poi_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271 entries, 0 to 270
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Latitude              271 non-null    float64
 1   Longitude             271 non-null    float64
 2   Average_dist_F        271 non-null    float64
 3   Average_popularity_F  271 non-null    float64
 4   Average_rating_F      241 non-null    float64
 5   Max_rating_F          271 non-null    float64
 6   Max_popularity_F      271 non-null    float64
 7   Min_rating_F          271 non-null    float64
 8   Min_popularity_F      271 non-null    float64
dtypes: float64(9)
memory usage: 19.2 KB


In [31]:
yp_poi_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271 entries, 0 to 270
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Latitude                271 non-null    float64
 1   Longitude               271 non-null    float64
 2   Average_distance_Y      271 non-null    float64
 3   Average_review_count_Y  271 non-null    float64
 4   Average_rating_Y        271 non-null    float64
 5   Max_rating_Y            271 non-null    float64
 6   Max_review_count        271 non-null    int64  
 7   Min_rating_Y            271 non-null    float64
 8   Min_review_count        271 non-null    int64  
dtypes: float64(7), int64(2)
memory usage: 19.2 KB


In [34]:
fs_poi_df.describe()

Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F,Max_rating_F,Max_popularity_F,Min_rating_F,Min_popularity_F
count,271.0,271.0,271.0,271.0,241.0,271.0,271.0,271.0,271.0
mean,53.471927,-2.248864,675.390693,0.939829,8.203071,8.724354,0.997628,7.621771,0.705636
std,0.014506,0.024805,138.065031,0.078475,0.900443,0.732017,0.002915,1.150558,0.335047
min,53.438221,-2.309977,298.6,0.48303,6.16,6.6,0.982323,5.6,0.000315
25%,53.461847,-2.266591,582.6,0.919838,7.53,8.35,0.997687,6.5,0.420135
50%,53.474141,-2.245889,689.1,0.980798,8.25,8.9,0.998809,7.8,0.951926
75%,53.483333,-2.231012,773.8,0.988088,9.13,9.4,0.999208,8.9,0.963762
max,53.501892,-2.184555,1089.3,0.994726,9.26,9.5,0.999892,9.1,0.980732


In [35]:
yp_poi_df.describe()

Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y,Max_rating_Y,Max_review_count,Min_rating_Y,Min_review_count
count,271.0,271.0,271.0,271.0,271.0,271.0,271.0,271.0,271.0
mean,53.471927,-2.248864,774.316839,11.615787,4.348235,4.984502,47.579336,3.541328,2.132841
std,0.014506,0.024805,157.499659,6.309328,0.384383,0.04694,26.538026,1.175013,1.354304
min,53.438221,-2.309977,295.985037,1.230769,3.261111,4.5,2.0,1.0,1.0
25%,53.461847,-2.266591,669.111777,5.684615,4.1325,5.0,26.0,3.1,1.0
50%,53.474141,-2.245889,780.745053,13.3,4.465,5.0,49.0,4.0,1.0
75%,53.483333,-2.231012,880.582375,16.025,4.665,5.0,70.0,4.45,4.0
max,53.501892,-2.184555,1226.832112,27.6,4.775,5.0,116.0,4.6,5.0


In [37]:
fs_poi_df.sort_values('Max_rating_F', ascending=False).head(10)

Unnamed: 0,Latitude,Longitude,Average_dist_F,Average_popularity_F,Average_rating_F,Max_rating_F,Max_popularity_F,Min_rating_F,Min_popularity_F
10,53.48581,-2.246643,712.0,0.99228,9.22,9.5,0.999208,9.0,0.963762
6,53.48161,-2.24203,499.8,0.988236,9.26,9.5,0.999208,9.1,0.963762
244,53.487812,-2.245381,757.9,0.99187,9.21,9.5,0.999208,9.0,0.963762
235,53.47316,-2.248502,676.4,0.994346,9.04,9.5,0.999208,8.8,0.974922
30,53.484874,-2.258995,844.2,0.992565,9.03,9.5,0.999208,8.7,0.974922
33,53.48266,-2.242656,513.4,0.988236,9.26,9.5,0.999208,9.1,0.963762
219,53.478827,-2.25277,449.0,0.993927,9.15,9.5,0.999208,8.9,0.974922
212,53.475543,-2.242257,757.3,0.990069,9.21,9.5,0.999208,9.1,0.963762
201,53.477389,-2.243707,613.8,0.990069,9.21,9.5,0.999208,9.1,0.963762
203,53.476743,-2.242947,657.9,0.990069,9.21,9.5,0.999208,9.1,0.963762


In [38]:
yp_poi_df.sort_values('Max_rating_Y', ascending=False).head(10)

Unnamed: 0,Latitude,Longitude,Average_distance_Y,Average_review_count_Y,Average_rating_Y,Max_rating_Y,Max_review_count,Min_rating_Y,Min_review_count
270,53.493086,-2.217525,1142.136471,11.75,4.365,5.0,68,3.1,1
0,53.462542,-2.216341,879.335943,14.1,4.435,5.0,78,4.0,1
269,53.481608,-2.204621,641.720977,1.571429,3.714286,5.0,3,1.0,1
2,53.46917,-2.225237,919.57883,13.45,4.465,5.0,47,4.1,2
3,53.46238,-2.211227,1107.131736,13.9,4.495,5.0,78,4.2,2
4,53.451169,-2.236843,901.395749,13.7,4.52,5.0,78,4.1,1
5,53.443651,-2.230244,801.563918,11.0,4.155,5.0,30,3.6,1
6,53.48161,-2.24203,574.076455,12.9,4.77,5.0,60,4.6,4
7,53.483443,-2.208112,795.494329,3.666667,3.655556,5.0,20,1.0,1
268,53.471395,-2.215647,1014.164561,9.45,4.015,5.0,46,3.1,1
