In [1]:
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

## Importing csv files to dataframes

In [12]:
beers_file = "Resources/beers.csv"
beers_df = pd.read_csv(beers_file).fillna(0).rename(columns={"Unnamed: 0":"index", "name":"beer_name"})
beers_df

Unnamed: 0,index,abv,ibu,id,beer_name,style,brewery_id,ounces
0,0,0.050,0.0,1436,Pub Beer,American Pale Lager,408,12.0
1,1,0.066,0.0,2265,Devil's Cup,American Pale Ale (APA),177,12.0
2,2,0.071,0.0,2264,Rise of the Phoenix,American IPA,177,12.0
3,3,0.090,0.0,2263,Sinister,American Double / Imperial IPA,177,12.0
4,4,0.075,0.0,2262,Sex and Candy,American IPA,177,12.0
...,...,...,...,...,...,...,...,...
2405,2405,0.067,45.0,928,Belgorado,Belgian IPA,424,12.0
2406,2406,0.052,0.0,807,Rail Yard Ale,American Amber / Red Ale,424,12.0
2407,2407,0.055,0.0,620,B3K Black Lager,Schwarzbier,424,12.0
2408,2408,0.055,40.0,145,Silverback Pale Ale,American Pale Ale (APA),424,12.0


In [13]:
breweries_file = "Resources/breweries.csv"
breweries_df = pd.read_csv(breweries_file).rename(columns={"Unnamed: 0":"brewery_id", "name":"brewery_name"})
breweries_df

Unnamed: 0,brewery_id,brewery_name,city,state
0,0,NorthGate Brewing,Minneapolis,MN
1,1,Against the Grain Brewery,Louisville,KY
2,2,Jack's Abby Craft Lagers,Framingham,MA
3,3,Mike Hess Brewing Company,San Diego,CA
4,4,Fort Point Beer Company,San Francisco,CA
...,...,...,...,...
553,553,Covington Brewhouse,Covington,LA
554,554,Dave's Brewfarm,Wilson,WI
555,555,Ukiah Brewing Company,Ukiah,CA
556,556,Butternuts Beer and Ale,Garrattsville,NY


## Windows Path

In [None]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## Mac Path

In [5]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## Scraping Data from Brewers Association Website

In [None]:
url = 'https://www.brewersassociation.org/statistics-and-data/state-craft-beer-stats/'
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [None]:
#                           NOT WORKING 
#State cards with data to scrape
states_cards = soup.find('section', class_='site-content wide')
states = soup.find_all('div', class_='stat-container')


#Set up lists to append into
state_list = []
million_dollars_list = []
breweries_per_capita_list = []
barrels_produced_list = []
gallons_per_adult_list = []


In [None]:
for state in states:
    state_name = state.find('h1').text.strip()
    state_list.append(state_name)
    #million dollars is the first one
    million_dollars = state.find('span', class_='total').find('span', class_='count').text.strip()
    million_dollars_list.append(million_dollars)
    breweries_per_capita = state.find('span', class_="bpc").find('span', class_= 'count').text.strip()
    breweries_per_capita_list.append(breweries_per_capita)
    #barrels produced is the second one
    barrels_produced = state.find(id ='production').find('span', class_='total').find('span', class_='count').text.strip()
    barrels_produced_list.append(barrels_produced)
    gallons_per_adult = state.find(id = 'production').find('span', class_='per-capita').find('span', class_='count').text.strip()
    gallons_per_adult_list.append(gallons_per_adult)

In [None]:
#working
state_list

In [None]:
million_dollars_list

In [None]:
#working
breweries_per_capita_list

In [None]:
#not working
barrels_produced_list

In [None]:
#grabbing breweries per capita
gallons_per_adult_list

In [None]:
brewers_association_data = pd.DataFrame(
    {'state': state_list,
     'millions_sales': million_dollars_list,
     'breweries_per_capita': breweries_per_capita_list,
     'barrels_produced': barrels_produced_list,
     'gallons_per_adult': gallons_per_adult_list
    })


In [None]:
brewers_association_data

## Transforming Data  (example code for what we will do later)

In [None]:
 # Create a filtered dataframe from specific columns
#premise_cols = ["License Serial Number", "Premises Name", "County ID Code"]
#premise_transformed= premise_df[premise_cols].copy()

# Rename the column headers
#premise_transformed = premise_transformed.rename(columns={"License Serial Number": "id",
                                                          "Premises Name": "premise_name",
                                                          "County ID Code": "county_id"})

# Clean the data by dropping duplicates and setting the index
#premise_transformed.drop_duplicates("id", inplace=True)
#premise_transformed.set_index("id", inplace=True)

#premise_transformed.head()

In [None]:
#county_cols = ["ID", "County Name (Licensee)", "County ID Code", "License Count"]
#county_transformed = county_df[county_cols].copy()

# Rename the column headers
#county_transformed = county_transformed.rename(columns={"ID": "id",
                                                         "County Name (Licensee)": "county_name",
                                                         "License Count": "license_count",
                                                         "County ID Code": "county_id"})

# Set index
#county_transformed.set_index("id", inplace=True)

#county_transformed.head()

## Connecting to postgres

In [6]:
engine = create_engine('sqlite:///Resources/brew.sqlite')
Base.metadata.create_all(engine)
session = Session(engine)

In [16]:
# Confirm tables
beers_df.to_sql('beers', con=engine, if_exists='append', index=True)
breweries_df.to_sql('breweries', con=engine, if_exists='append', index=True)

In [17]:
engine.execute('CREATE VIEW brews AS SELECT r.brewery_id, r.city, r.state, e.beer_name, e.abv, e.ibu, e.id, e.style, e.ounces FROM beers e INNER JOIN breweries r ON r.brewery_id=e.brewery_id')

<sqlalchemy.engine.result.ResultProxy at 0x12287de80>

In [18]:
combined_df = pd.DataFrame(engine.execute('SELECT * FROM brews')).rename(columns="0":"brewery_id", r.city, r.state, e.beer_name, e.abv, e.ibu, e.id, e.style, e.ounces FROM beers e INNER JOIN breweries r ON r.brewery_id=e.brewery_id')
combined_df

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,408,Bend,OR,Pub Beer,0.050,0.0,1436,American Pale Lager,12.0
1,177,Gary,IN,Devil's Cup,0.066,0.0,2265,American Pale Ale (APA),12.0
2,177,Gary,IN,Rise of the Phoenix,0.071,0.0,2264,American IPA,12.0
3,177,Gary,IN,Sinister,0.090,0.0,2263,American Double / Imperial IPA,12.0
4,177,Gary,IN,Sex and Candy,0.075,0.0,2262,American IPA,12.0
...,...,...,...,...,...,...,...,...,...
2405,424,Denver,CO,Belgorado,0.067,45.0,928,Belgian IPA,12.0
2406,424,Denver,CO,Rail Yard Ale,0.052,0.0,807,American Amber / Red Ale,12.0
2407,424,Denver,CO,B3K Black Lager,0.055,0.0,620,Schwarzbier,12.0
2408,424,Denver,CO,Silverback Pale Ale,0.055,40.0,145,American Pale Ale (APA),12.0
