In [1]:
#Imports

import foursquare
import pandas as pd
import json
import scripts.api_request as api_request
import scripts.data_retrieve as data_retrieve


# Project Plan
- ## Use scripts and folders to keep code organized
    - ### Had difficulty connecting script files together. (ie. using a script of constants to be shared across the project)
- ## Save every request
    - ### If I did it again, I would save the entirity of every response (currently I am lightly parsing some of the responses before saving).
- ## Use random geo locations
- ## Keep it simple: compair only a single common value from each API
# Stretch Goals
- ## Include images of geo locations and/or venue locations
- ## Create a presentation function that pulls a random sample from data and displays it visually
- ## Plot travel route between selected groups of locations
# APIs used:
- ## 3geonames API from: https://3geonames.org/api for lattitude and longitude generation
- ## Nominatim from: https://nominatim.org/release-docs/develop/api/ for country names
- ## Foursquare
- ## Yelp
- ## Google Places
# Limitations:
- ## Each API is geared towards slightly different types of venues and places.
- ## Each API handles radius in a very different way, making results not representive of exactly the same area
- ## Each API has different limitations on how many results are returned from a request, and may also have additional filters 

In [21]:
### Sample Table Structure
table_structure = [
                    { 
                    'Geo Image'             : 1,
                    'Geo Location'          : 2,
                    'Country'               : 3,
                    'Venue'                 : 4,
                    'Venue Location'        : 5,
                    'FourSquare Rating'     : 6,
                    'Yelp Rating'           : 7,
                    'Google Places Rating'  : 8,
                    'Venue Image'           : 9                
                    }
                ]
pd.DataFrame(table_structure)

Unnamed: 0,Geo Image,Geo Location,Country,Venue,Venue Location,FourSquare Rating,Yelp Rating,Google Places Rating,Venue Image
0,1,2,3,4,5,6,7,8,9


In [2]:
### Generate new samples
api_request.collect_random_samples(1)

In [2]:
# Assemble Data frame
geo = data_retrieve.get_geo_data()
foursquare = data_retrieve.get_four_square_data()
yelp = data_retrieve.get_yelp_data()
google = data_retrieve.get_google_data()
countries = data_retrieve.get_nominatim_data()
complete_data = countries.merge(geo, on='geo_location').merge(foursquare, on='geo_location').merge(yelp, on='geo_location').merge(google,on='geo_location')
complete_data.head(10)

Unnamed: 0,geo_location,country,city,four_square_venues,yelp_venues,google_venues
0,"-31.21898, 151.13058",Australia,Dungowan,6,0,9
1,"-15.21639, 34.13639",Mozambique,Pequituba,0,0,0
2,"18.433333, -153.166667",,Hinds Seamount,0,0,0
3,"29.03358, -98.05028",United States,Poth,30,0,17
4,"-3.84417, -40.97889",Brazil,Ubajara,30,0,18
5,"-27.304396, 143.686717",Australia,Tobermory,0,0,0
6,"30.65737, 73.26189",Pakistan,Chak Sixty-one - Five Left,20,0,18
7,"26.85, 90.5",Bhutan,Nepāltārgaon,1,0,18
8,"13.67207, 103.71726",Cambodia,Phum Koŭk Têl,1,0,18
9,"76.4679, -121.6737",Canada,Hardinge Bay,0,0,0


In [29]:
### Compare Coverage
fs = complete_data[['country','four_square_venues']].groupby('country').sum()
fs_countries = len(fs[fs['four_square_venues']!= 0])
fs_venues = sum(fs['four_square_venues'])

yelp = complete_data[['country','yelp_venues']].groupby('country').sum()
yelp_countries = len(yelp[yelp['yelp_venues']!= 0])
yelp_venues = sum(yelp['yelp_venues'])

google = complete_data[['country','google_venues']].groupby('country').sum()
google_countries = len(google[google['google_venues']!= 0])
google_venues = sum(google['google_venues'])

samples = len(complete_data)
print(f''' 
      From {samples} samples:''')

print(f''' 
      Four Square:
      {fs_venues} venues across {fs_countries} countries
      ''')
print(f''' 
      Google:
      {google_venues} venues across {google_countries} countries
      ''')
print(f''' 
      Yelp:
      {yelp_venues} venues across {yelp_countries} countries
      ''')

 
      From 200 samples:
 
      Four Square:
      1118 venues across 31 countries
      
 
      Google:
      1042 venues across 38 countries
      
 
      Yelp:
      365 venues across 9 countries
      


In [30]:
### Yelp gives many more results in major cities

complete_data.sort_values('yelp_venues', ascending=False).head(10)

Unnamed: 0,geo_location,country,city,four_square_venues,yelp_venues,google_venues
180,"40.39023, -3.72178",Spain,Madrid,30,240,18
155,"49.7904, 10.0247",Germany,Wuerzburg,30,36,18
17,"15.65737, -96.50063",Mexico,Puerto Ángel,30,31,18
102,"47.02842, 6.61579",France,Morteau,30,12,18
160,"43.37839, 3.62786",France,Sète,30,11,18
46,"54.87515, -1.64602",United Kingdom,Chester-le-Street,30,9,18
196,"32.09875, -98.52533",United States,De Leon,30,8,19
111,"37.21588, -86.26692",United States,Brownsville,30,6,18
63,"41.82522, -111.2927",United States,Laketown,30,3,19
13,"45.75528, 1.27563",France,Solignac,30,2,18


# With more time:   Visualize the coverage with a map (Geopandas?)