In [1]:
# Import dependencies
import pandas as pd
import requests

from config import api_key
from database_connection import ENGINE
from update_city_summary import update_city_summary

In [2]:
# import yelpAPI
from yelpapi import YelpAPI
import argparse
from pprint import pprint

In [3]:
# grab the top 1000 US cities from the database
top_us_cities_query = '''
SELECT *
FROM top_us_cities
'''
cities = pd.read_sql(
    top_us_cities_query,
    con=ENGINE
)

In [4]:
# create list of city, state combinations
cities_list = cities['city'] + ', ' + cities['state']
cities_list

0           Houston, TX
1          Brooklyn, NY
2           Chicago, IL
3       Los Angeles, CA
4             Miami, FL
             ...       
995         Gadsden, AL
996          Santee, CA
997       Hempstead, NY
998         Griffin, GA
999    Harrisonburg, VA
Length: 1000, dtype: object

In [5]:
# Create empty list to hold all breweries
brewery_results = []

In [6]:
# Function to pull breweries for each location):
def get_breweries(location):
    with YelpAPI(api_key) as yelp_api:
        # city_data is the json file with all businesses returned from search
        city_data = yelp_api.search_query(term='Breweries', location = location)
        # For each brewery in the search, return business name, coordinates, city, and buisness type
        for i in range(len(city_data['businesses'])):
            country = city_data['businesses'][i]['location']['country']
            if country == 'US':
                yelp_id = city_data['businesses'][i]['id']
                name = city_data['businesses'][i]['name']
                lat = city_data['businesses'][i]['coordinates']['latitude']
                long = city_data['businesses'][i]['coordinates']['longitude']
                original_search_city = location.split(',')[0]
                yelp_city = city_data['businesses'][i]['location']['city']
                business_type = city_data['businesses'][i]['categories'][0]['alias']
                is_closed = city_data['businesses'][i]['is_closed']
                rating = city_data['businesses'][i]['rating']
                zip_code = city_data['businesses'][i]['location']['zip_code']
                state = city_data['businesses'][i]['location']['state']

                brewery_results.append(
                    {
                        'yelp_id':yelp_id,
                        'name':name, 
                        'lat':lat, 
                        'lng': long, 
                        'original_search_city':original_search_city,
                        'yelp_city':yelp_city, 
                        'business_type':business_type, 
                        'is_closed':is_closed, 
                        'rating':rating, 
                        'zip_code':zip_code, 
                        'state':state
                    }
                )

In [7]:
# Apply get_breweries function to each location in cities_list
for city in cities_list:
    try:
        get_breweries(city)
    except Exception as e:
        print(e)
        print(f'An exception occurred for {city}')

In [8]:
breweries_df = pd.DataFrame(brewery_results)
breweries_df.head()

Unnamed: 0,yelp_id,name,lat,lng,original_search_city,yelp_city,business_type,is_closed,rating,zip_code,state
0,-SjX-mvv6ZZjxXUJvV3Quw,Saint Arnold Brewing Company,29.77109,-95.34841,Houston,Houston,brewpubs,False,4.5,77020,TX
1,bw5zHhEUJlLFlzo_6Id2Mw,Buffalo Bayou Brewing,29.772745,-95.379453,Houston,Houston,breweries,False,4.0,77007,TX
2,NQ3SptJthc-zoi93rtNubg,True Anomaly Brewing Company,29.74982,-95.35728,Houston,Houston,breweries,False,4.5,77003,TX
3,Dt2rzxZbXKixSRch8jcPVQ,8th Wonder Brewery,29.749059,-95.355806,Houston,Houston,breweries,False,4.0,77003,TX
4,VaKBKgeVFXa8gYDQfMLmJg,Frost Town Brewing,29.7608,-95.35234,Houston,Houston,brewpubs,False,5.0,77002,TX


In [9]:
# Length of breweries_df
len(breweries_df)

17432

In [10]:
# Create empty List ot hold all Outdoor Gear Stores
outdoor_gear_results = []

In [11]:
# Function to pull stores from yelp with location as the variable
def get_outdoor_stores(location):
    with YelpAPI(api_key) as yelp_api:
        # city_data is the json file with all businesses returned from search
        city_data = yelp_api.search_query(term='Outdoor Gear', location = location)
        # For each brewery in the search, return business name, coordinates, city, and business type
        for i in range(len(city_data['businesses'])):
            country = city_data['businesses'][i]['location']['country']
            if country == 'US':
                yelp_id = city_data['businesses'][i]['id']
                name = city_data['businesses'][i]['name']
                lat = city_data['businesses'][i]['coordinates']['latitude']
                long = city_data['businesses'][i]['coordinates']['longitude']
                original_search_city = location.split(',')[0]
                yelp_city = city_data['businesses'][i]['location']['city']
                business_type = city_data['businesses'][i]['categories'][0]['alias']
                is_closed = city_data['businesses'][i]['is_closed']
                rating = city_data['businesses'][i]['rating']
                zip_code = city_data['businesses'][i]['location']['zip_code']
                state = city_data['businesses'][i]['location']['state']
            
                outdoor_gear_results.append(
                    {
                        'yelp_id':yelp_id,
                        'name':name, 
                        'lat':lat, 
                        'lng': long, 
                        'original_search_city':original_search_city,
                        'yelp_city':yelp_city,
                        'business_type':business_type, 
                        'is_closed':is_closed, 
                        'rating':rating, 
                        'zip_code':zip_code, 
                        'state':state
                    }
                )

In [12]:
# Apply get_outdoor_stores function to each location in the cities_list
for city in cities_list:
    try:
        get_outdoor_stores(city)
    except Exception as e:
        print(e)
        print(f'An exception occurred for {city}')

list index out of range
An exception occurred for Littleton, CO
list index out of range
An exception occurred for Topeka, KS
list index out of range
An exception occurred for Stamford, CT
list index out of range
An exception occurred for Norwalk, CT
list index out of range
An exception occurred for Gulfport, MS


In [13]:
# Convert to pandas DataFrame
outdoor_df = pd.DataFrame(outdoor_gear_results)
outdoor_df.head()

Unnamed: 0,yelp_id,name,lat,lng,original_search_city,yelp_city,business_type,is_closed,rating,zip_code,state
0,9xSfMWqjdJlQghkDQBtH3Q,Whole Earth Provision,29.739638,-95.410855,Houston,Houston,shoes,False,4.5,77098,TX
1,lE3zYqpeqkTrkbIWAf6EBw,5.11 Tactical,29.738527,-95.483354,Houston,Houston,outdoorgear,False,4.5,77057,TX
2,vUBjkadRtCXWJup0DkqIQg,YakGear,29.823106,-95.383425,Houston,Houston,outdoorgear,False,5.0,77022,TX
3,j1wPAGTo8FSMMJySl4oqBw,Gordy & Sons Outfitters,29.768678,-95.398693,Houston,Houston,huntingfishingsupplies,False,5.0,77007,TX
4,pRVSJzx6EwzT3gKi_nvSuA,Bass Pro Shops,29.594926,-95.389867,Houston,Houston,sportswear,False,3.0,77047,TX


In [14]:
# Length of outdoor_df
len(outdoor_df)

14561

In [15]:
# ensure we do not have any zip codes that are empty
breweries_df = breweries_df.loc[breweries_df['zip_code'] != '']
outdoor_df = outdoor_df.loc[outdoor_df['zip_code'] != '']

In [16]:
# cast zip codes as ints
breweries_df['zip_code'] = breweries_df['zip_code'].astype('int')
outdoor_df['zip_code'] = outdoor_df['zip_code'].astype('int')

In [17]:
# fill `breweries` and `outdoor` tables with Yelp search results 
breweries_df.to_sql(
    'breweries', 
    con=ENGINE, 
    if_exists='replace',
    index=False
    )

In [18]:
outdoor_df.to_sql(
    'outdoor', 
    con=ENGINE, 
    if_exists='replace',
    index=False
    )

In [20]:
# update the city_summary table
update_city_summary()