## Gathering Brewery Data

This notebook uses the Open Brewery DB to get lists of breweries and store that data on GBQ. 

In [None]:
import os
import re
import datetime 
from collections import Counter
import requests
import time

import pandas as pd
import numpy as np
import pandas_gbq
import janitor

# Do our imports for the code
from google.cloud import bigquery
from google.oauth2 import service_account

### GBQ Set Up

In this next section we connect to our GBQ project and list the data sets inside to test the connection.

In [None]:
# These first two values will be different on your machine. 
service_path = "/Users/chandler/Dropbox/Teaching/"
service_file = 'umt-msba-037daf11ee16.json' # change this to your authentication information  
gbq_proj_id = 'umt-msba' # change this to your project. 

# And this should stay the same. 
private_key =service_path + service_file

# Now we pass in our credentials so that Python has permission to access our project.
credentials = service_account.Credentials.from_service_account_file(service_path + service_file)

# And finally we establish our connection
client = bigquery.Client(credentials = credentials, project=gbq_proj_id)

for item in client.list_datasets() : 
    print(item.full_dataset_id)

In [None]:
api_url = 'https://api.openbrewerydb.org/breweries'

In [None]:
iteration = 1

params = {'per_page':20}

while True : 
    r = requests.get(api_url,params=params)
    
    if r.status_code != 200 : 
        print(f'Hit a status code of {r.status_code} on page {iteration}. Sleeping for 60 seconds.')
        time.sleep(60)
        r = requests.get(api_url,params=params)
        
        
    
    call_results = pd.DataFrame.from_dict(r.json())
    
    if iteration == 1 : 
        breweries = call_results
    else : 
        breweries = pd.concat([breweries,
                               call_results])
    

    if iteration % 50 == 0 : 
        print(f'We have done {iteration} iterations. Sleeping for 30 seconds.')
        time.sleep(30)
    
    if call_results.shape[0] < 20 : 
        break

    iteration += 1
    params['page'] = iteration
    
    print(f"The total size of the breweries table is {breweries.shape[0]}")
    
    if iteration > 1000 : 
        break
    


In [None]:
breweries.to_csv("brewery_data.txt",sep="\t")

In [None]:
pandas_gbq.to_gbq(breweries, "umt-msba.dram_shop.brewery_data", project_id=gbq_proj_id,if_exists="replace")