In [1]:
# install and imports
# imports
import requests
import os
import pandas as pd
import numpy as np
from pprint import pprint
from sqlalchemy import create_engine
import sqlite3
from sqlite3 import Error

# Foursquare

Send a request to Foursquare with a small radius (1000m) with the location of your choice

In [2]:
FOURSQUARE_API_KEY = os.environ["FOURSQUARE_API_KEY"]

url = "https://api.foursquare.com/v3/places/search?"

headers_fsq = {
    "accept": "application/json",
    "Authorization": FOURSQUARE_API_KEY
}

params_fsq={ 'll' : "49.2827,-123.1207",
        'radius' : '1000',
        'limit' : '50',
        'fields' : 'rating,name,categories,fsq_id,location,distance'
       }

response_fsq = requests.get(url, headers=headers_fsq, params=params_fsq)

Parse through the response to get the POI details you want (rating, name, location, etc)

In [3]:
request_dict_fsq = response_fsq.json()
results_fsq = {'name' : [],
           'categories' : [],
           'fsq_id' : [],
           'location' : [],
           'distance' : [],
            'rating' : []}

for place in request_dict_fsq['results']:
    for key in results_fsq.keys():
        results_fsq[key].append(place[key])
        
results_df_fsq = pd.DataFrame.from_dict(results_fsq)

Put your parsed results into a DataFrame

In [4]:
def tidy_df(dirty_df, columns, api):
    """
    Function to tidy nested dataframes
    Currently only working on categories
    
    :params:
    dirty_df (pandas dataframe): dataframe to be cleaned
    columns (list[str]): column names for categories
    api (str): name of api to clean
    
    :return:
    cat_df (pandas dataframe): category dataframe cleaned and tidy
    
    :todo:
    Make it work without passing api or columns regardless of which api dataframe we pass
    """
    cats = dirty_df.pop('categories')
    cat_df = pd.DataFrame(columns=columns)
    cat_index=0
    for cat_l in cats:
        for cat_d in cat_l:
            if api.lower() == 'fsq':
                df = pd.DataFrame({'id':cat_d['id'], 'name':cat_d['name'],
                                   'icon': cat_d['icon']['prefix']+cat_d['icon']['suffix']},
                                  index=[cat_index])
            elif api.lower() == 'yelp':
                df = pd.DataFrame({'alias': cat_d['alias'], 'title': cat_d['title']}, index=[cat_index])
            elif api.lower() == 'google':
                pass #df = pd.DataFrame({},index=cat_index)
            else:
                return -1
            cat_df = pd.concat([cat_df, df], ignore_index = False)
            cat_df.rename({cat_df.index[-1]:cat_index}, inplace = True) 
        cat_index += 1
    return cat_df

In [5]:
# Clean & Tidy: Category and Location columns
cat_df_fsq = tidy_df(results_df_fsq, columns=['id','name','icon'], api='fsq')
loc_fsq = [pd.DataFrame.from_dict([d for d in results_df_fsq.pop('location')])]

# Add back Category and Location columns
# Make sure data is 'tidydata'
results_tidy_fsq = results_df_fsq.join(loc_fsq)
results_tidy_fsq = results_tidy_fsq.join(cat_df_fsq, how='right', lsuffix='_l', rsuffix='_r')
results_tidy_fsq = results_tidy_fsq.rename({'name_l':'name','id':'category_id','name_r':'category_name','icon':'category_icon'}, axis='columns').reset_index(drop=True)
results_tidy_fsq = results_tidy_fsq.replace(r'^\s*$', np.nan, regex=True)

# Yelp

In [6]:
YELP_API_KEY = '8Zg6MNbVnuhfn6hKVOPqMhQxjbsxg5hsuFSLGimVWaJPtwoDycIJuBIu0bL_RwMHppDoI67TUzqnTfmjCdt69jXHEwkrgsFCKXb6VyK6aF2ACwXdPhi2jhB80lA2Y3Yx'#os.environ["YELP_API_KEY"]

url = "https://api.yelp.com/v3/businesses/search"

headers_yp = {
    "accept": "application/json",
    "Authorization": f'Bearer {YELP_API_KEY}'
}
params_yp = {'latitude' : "49.2827",
        'longitude' : '-123.1207',
        'radius' : '1000',
        'limit' : '50'
       }

Send a request with the same location paramaters (location, radius, etc)

In [7]:
response_yelp = requests.get(url, headers=headers_yp, params=params_yp)
print(response_yelp.status_code)

200


Parse through your result and get POI details

In [8]:
request_dict_yp = response_yelp.json()
results_yp = {'name' : [],
           'categories' : [],
           'id' : [],
           'location' : [],
           'distance' : [],
           'rating' : []
          }

for place in request_dict_yp['businesses']:
    for key in results_yp.keys():
        results_yp[key].append(place[key])
        
results_df_yp = pd.DataFrame.from_dict(results_yp)

In [9]:
columns = list(results_df_yp['categories'][0][0].keys())
cat_df_yp = tidy_df(results_df_yp, columns=columns, api='yelp')

Put your parsed results into a DataFrame

In [10]:
#Clean and Tidy Categories and Location
results_df_yp = pd.DataFrame.from_dict(results_yp)
cat_df_yp = tidy_df(results_df_yp, columns=columns, api='yelp')
loc_yp = [pd.DataFrame.from_dict([d for d in results_df_yp.pop('location')])]


# Add back Category and Location columns
# Make sure data is 'tidydata'
results_tidy_yp = results_df_yp.join(loc_yp)
results_tidy_yp = results_tidy_yp.join(cat_df_yp)
results_tidy_yp = results_tidy_yp.replace(r'^\s*$', np.nan, regex=True)
results_tidy_yp = results_tidy_yp.rename({'id':'yp_id'})
results_tidy_yp['zip_code'] = results_tidy_yp['zip_code'].astype("string")


# Google (stretch)

Use the same process as the first two APIs

# Database

Put all your results in an SQLite3 database (remember, SQLite stores its databases as files in your local machine - make sure to create your database in your project's data/ directory!)

In [11]:
# Working with permissions due to 'attempt to write a readonly database' error
# 
os.chdir('/Users/calebward/LL_mini_project_II/data/')
os.chmod('/Users/calebward/LL_mini_project_II/data/', 0o774)

def create_connection(path):
    """
    Create connection to database
    :param:
    path (str): path to database (Note: in this case just the file name)
    :return:
    connection (sqlite3.connect): connection to database
    """
    connection = None
    try:
        connection = sqlite3.connect(path)
        print("Connection Success")
    except Error as e:
        print(f"Error: '{e}'")

    return connection


def execute_query(connection, sql):
    """
    Run a SQL query (INSERT, ETC)
    :param:
    connection (sqlite): connection to server
    sql (str): The SQL query to execute
    :return:
    NONE
    """
    cursor = connection.cursor()
    try:
        cursor.execute(sql)
        connection.commit()
        print("Query Success")
    except Error as e:
        print(f"Error: '{e}'")

def execute_read_query(connection, sql):
    """
    Run a SQL read query (SELECT, ETC)
    :param:
    connection (sqlite): connection to server
    sql (str): The SQL query to execute
    :return:
    result (pd.Dataframe): A Pandas dataframe of your result
    """
    cursor = connection.cursor()
    result = None
    try:
        cursor.execute(sql)
        result = pd.read_sql(sql, connection)
        return result
    except Error as e:
        print(f"Error: '{e}'")
        
def write_apidata_to_db(table_name, connection, data):
    """
    Write to Database using pandas instead of SQLite3
    :param:
    tabe_name (str): name of the table
    connection (sqlite): connection to server
    data (pandas.DataFrame): the Data to write
    :return:
    None
    """
    data.to_sql(name = table_name, con = connection,
                index_label = 'key', if_exists='append')

In [12]:
create_fsq_table = """
CREATE TABLE IF NOT EXISTS fsq_poi (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT NOT NULL,
    fsq_id TEXT,
    distance INTEGER,
    address TEXT
    country TEXT
    cross_street, TEXT
    formatted_address TEXT
    locality TEXT
    neighborhood TEXT
    locality TEXT
    postcode TEXT,
    region TEXT,
    address_extended TEXT
    category_id INTEGER,
    category_name TEXT,
    category_icon TEXT,
    rating FLOAT
);
"""

create_yelp_table = """
CREATE TABLE IF NOT EXISTS yelp_poi (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT NOT NULL,
    yp_id TEXT,
    distance FLOAT,
    rating FLOAT,
    display_address TEXT,
    address1 TEXT,
    address2 TEXT,
    address3 TEXT,
    city TEXT,
    zip_code TEXT,
    country TEXT,
    state TEXT,
    alias TEXT,
    title TEXT
    );
"""


In [13]:
conn = create_connection('mini_db.sqlite')
execute_query(conn, create_fsq_table)
execute_query(conn, create_yelp_table)

Connection Success
Query Success
Query Success


Get the top 10 restaurants according to their rating

In [15]:
def select_top_POI(data, n = 10, keep='all'):
    """
    Find the top 10 rated points of interest from different datasets with their ratings normalized between sets.
    :params:
    data (list[pandas.DataFrames]): A list of all the Dateframe to search through
    n (int): How many POI to return
    keep (str): Determines how to deal with ties that extended the data frame beyond 'n'
                'all' show ties
                'first' show first POI when there is a tie
                'last' show last POI when there is a tie
    
    :return:
    rating_df (pandas.Dataframe): A dataframe with the name and ratings with the index as rank
    """
    rating_df = pd.DataFrame()
    for df in data:
        df['rating'] = df['rating'].apply(lambda x : (x/df['rating'].max())*10)
        rating_df = pd.concat([rating_df,df[['name','rating']]])
    return rating_df.drop_duplicates().nlargest(n,'rating',keep=keep).sort_values(['rating','name'],ascending=False).reset_index(drop=True)


pd.options.display.float_format = "{:,.2f}".format
select_top_POI([results_tidy_yp, results_tidy_fsq])


Unnamed: 0,name,rating
0,Manoush'eh,10.0
1,Incognito Coffee,10.0
2,Gotham Steakhouse & Cocktail Bar,10.0
3,Commodore Ballroom,9.89
4,Victoria's Secret,9.78
5,Sephora,9.78
6,Hawksworth Restaurant,9.78
7,The Orpheum,9.67
8,Le Crocodile Restaurant,9.67
9,Indigo - Robson,9.67


In [None]:
#SQL Query to test on working DB
top_ten_POI_SQL = """
SELCT poi.name as name, category.rating as rating
FROM poi
JOIN poi_category
WHERE poi.poi_id = poi_category.poi_id
JOIN category
WHERE category.category_id = poi_category.category_id
ORDER BY category.rating DESC
LIMIT 10;
"""


# Travelling Salesman Problem (stretch)

If you have time, follow the steps in the [ortools tutorial](https://developers.google.com/optimization/routing/tsp) using Google's [Directions API](https://developers.google.com/maps/documentation/directions/start).