In [1]:
# Import dependencies
import pandas as pd
import requests

from config import api_key
from database_connection import ENGINE
from update_city_summary import update_city_summary

In [2]:
# import yelpAPI
from yelpapi import YelpAPI
import argparse
from pprint import pprint

In [3]:
# grab the top 1000 US cities from the database
top_us_cities_query = '''
SELECT *
FROM top_us_cities
'''
cities = pd.read_sql(
    top_us_cities_query,
    con=ENGINE
)

In [5]:
# create list of city, state combinations
cities_list = cities['city'] + ', ' + cities['state']
cities_list

312          Albany, NY
582        Bethesda, MD
113          Irvine, CA
226         Everett, WA
487     Des Plaines, IL
263           Tempe, AZ
109     Jersey City, NJ
470     Westerville, OH
957      San Ysidro, CA
123       Pensacola, FL
882    Cartersville, GA
910      Union City, NJ
36       Louisville, KY
231        Manassas, VA
310           Tracy, CA
873        Waterloo, IA
641       Kingsport, TN
567          Carson, CA
696           Houma, LA
797     Port Orange, FL
501          Quincy, MA
867       Goldsboro, NC
421        Elmhurst, NY
705        Columbus, IN
411       Rochester, MI
dtype: object

In [6]:
# Create empty list to hold all breweries
brewery_results = []

In [7]:
# Function to pull breweries for each location):
def get_breweries(location):
    with YelpAPI(api_key) as yelp_api:
        # city_data is the json file with all businesses returned from search
        city_data = yelp_api.search_query(term='Breweries', location = location)
        # For each brewery in the search, return business name, coordinates, city, and buisness type
        for i in range(len(city_data['businesses'])):
            yelp_id = city_data['businesses'][i]['id']
            name = city_data['businesses'][i]['name']
            lat = city_data['businesses'][i]['coordinates']['latitude']
            long = city_data['businesses'][i]['coordinates']['longitude']
            original_search_city = location.split(',')[0]
            yelp_city = city_data['businesses'][i]['location']['city']
            business_type = city_data['businesses'][i]['categories'][0]['alias']
            is_closed = city_data['businesses'][i]['is_closed']
            rating = city_data['businesses'][i]['rating']
            zip_code = city_data['businesses'][i]['location']['zip_code']
            state = city_data['businesses'][i]['location']['state']

            brewery_results.append(
                {
                    'yelp_id':yelp_id,
                    'name':name, 
                    'lat':lat, 
                    'lng': long, 
                    'original_search_city':original_search_city,
                    'yelp_city':yelp_city, 
                    'business_type':business_type, 
                    'is_closed':is_closed, 
                    'rating':rating, 
                    'zip_code':zip_code, 
                    'state':state
                }
            )

In [8]:
# Apply get_breweries function to each location in cities_list
for city in cities_list:
    try:
        get_breweries(city)
    except Exception as e:
        print(e)
        print(f'An exception occurred for {city}')

In [9]:
breweries_df = pd.DataFrame(brewery_results)
breweries_df.head()

Unnamed: 0,yelp_id,name,lat,lng,original_search_city,yelp_city,business_type,is_closed,rating,zip_code,state
0,ky2jlrpmBx0hH3mi6hnjOw,Fidens Brewing Company,42.717113,-73.837748,Albany,Colonie,breweries,False,5.0,12205,NY
1,Pvm0E8SyKn-SdqD5XEjN4A,The City Beer Hall,42.64955,-73.75503,Albany,Albany,bars,False,4.0,12207,NY
2,4ToPo-f-SK8vbrDGUjjMeQ,Druthers Brewing Company,42.666249,-73.741568,Albany,Albany,burgers,False,4.0,12204,NY
3,eInggtKDsGX7NnpsY1EyYQ,Fort Orange Brewing,42.667112,-73.743292,Albany,Albany,breweries,False,4.0,12204,NY
4,JkjHR7StocXN8bF6BKd1Sw,Delaware Supply,42.642988,-73.776625,Albany,Albany,beerbar,False,5.0,12209,NY


In [10]:
# Length of breweries_df
len(breweries_df)

455

In [11]:
# Create empty List ot hold all Outdoor Gear Stores
outdoor_gear_results = []

In [12]:
# Function to pull stores from yelp with location as the variable
def get_outdoor_stores(location):
    with YelpAPI(api_key) as yelp_api:
        # city_data is the json file with all businesses returned from search
        city_data = yelp_api.search_query(term='Outdoor Gear', location = location)
        # For each brewery in the search, return business name, coordinates, city, and buisness type
        for i in range(len(city_data['businesses'])):
            yelp_id = city_data['businesses'][i]['id']
            name = city_data['businesses'][i]['name']
            lat = city_data['businesses'][i]['coordinates']['latitude']
            long = city_data['businesses'][i]['coordinates']['longitude']
            original_search_city = location.split(',')[0]
            yelp_city = city_data['businesses'][i]['location']['city']
            business_type = city_data['businesses'][i]['categories'][0]['alias']
            is_closed = city_data['businesses'][i]['is_closed']
            rating = city_data['businesses'][i]['rating']
            zip_code = city_data['businesses'][i]['location']['zip_code']
            state = city_data['businesses'][i]['location']['state']
            
            outdoor_gear_results.append(
                {
                    'yelp_id':yelp_id,
                    'name':name, 
                    'lat':lat, 
                    'lng': long, 
                    'original_search_city':original_search_city,
                    'yelp_city':yelp_city,
                    'business_type':business_type, 
                    'is_closed':is_closed, 
                    'rating':rating, 
                    'zip_code':zip_code, 
                    'state':state
                }
            )

In [13]:
# Apply get_outdoor_stores function to each location in the cities_list
for city in cities_list:
    try:
        get_outdoor_stores(city)
    except Exception as e:
        print(e)
        print(f'An exception occurred for {city}')

In [14]:
# Convert to pandas DataFrame
outdoor_df = pd.DataFrame(outdoor_gear_results)
outdoor_df.head()

Unnamed: 0,yelp_id,name,lat,lng,original_search_city,yelp_city,business_type,is_closed,rating,zip_code,state
0,FkYZFHvbLUJ6_K5xCkLnxw,Field & Stream,42.752953,-73.763981,Albany,Latham,huntingfishingsupplies,False,4.5,12110,NY
1,wablChswRptR38ipjRHXjA,Eastern Mountain Sports,42.68505,-73.840111,Albany,Albany,outdoorgear,False,3.0,12203,NY
2,guyEBgSr3m4-4CLIyTYIMg,L.L. Bean,42.70964,-73.81773,Albany,Albany,sportswear,False,4.0,12205,NY
3,-t47CbsmRt-EGA7C_NBDXg,Mohawk Army Navy,42.77063,-73.89355,Albany,Schenectady,outdoorgear,False,3.5,12304,NY
4,2pJhX07LqqfyJwPq20zc7w,Steiner's Sports,42.60186,-73.789783,Albany,Glenmont,bikes,False,3.0,12077,NY


In [15]:
# Length of outdoor_df
len(outdoor_df)

405

In [16]:
# ensure we do not have any zip codes that are empty
breweries_df = breweries_df.loc[breweries_df['zip_code'] != '']
outdoor_df = outdoor_df.loc[outdoor_df['zip_code'] != '']

In [17]:
# cast zip codes as ints
breweries_df['zip_code'] = breweries_df['zip_code'].astype('int')
outdoor_df['zip_code'] = outdoor_df['zip_code'].astype('int')

In [18]:
# fill `breweries` and `outdoor` tables with Yelp search results 
breweries_df.to_sql(
    'breweries', 
    con=ENGINE, 
    if_exists='replace',
    index=False
    )

In [19]:
outdoor_df.to_sql(
    'outdoor', 
    con=ENGINE, 
    if_exists='replace',
    index=False
    )

In [20]:
# update the city_summary table
update_city_summary()