In [1]:
import pandas as pd
from sqlalchemy import create_engine

# PyMySQL 
import pymysql
pymysql.install_as_MySQLdb()

from config import mypass

In [3]:
# Extract CSVs into DataFrames
alcohol_file = "Resources/alcohol_data.csv"
state_df = pd.read_csv(alcohol_file)
state_df.head()

Unnamed: 0,location,year,ethanol_beer_gallons_per_capita,ethanol_wine_gallons_per_capita,ethanol_spirit_gallons_per_capita,ethanol_all_drinks_gallons_per_capita
0,alabama,2016,1.09,0.26,0.65,2.01
1,alabama,2015,1.11,0.26,0.64,2.01
2,alabama,2014,1.13,0.25,0.62,2.0
3,alabama,2013,1.14,0.25,0.6,1.99
4,alabama,2012,1.16,0.24,0.6,2.0


In [4]:
# Transform county DataFrame
state_df_cols = ["location", "year", "ethanol_beer_gallons_per_capita", "ethanol_wine_gallons_per_capita", 
                       "ethanol_spirit_gallons_per_capita",
                       "ethanol_all_drinks_gallons_per_capita"]
state_transformed = state_df[state_df_cols].copy()

# Rename the column headers
state_transformed = state_transformed.rename(columns={"location": "state_name",
                                                         "ethanol_beer_gallons_per_capita": "beer_per_capita",
                                                         "ethanol_wine_gallons_per_capita": "wine_per_capita",
                                                         "ethanol_spirit_gallons_per_capita": "spirit_per_capita",
                                                         "ethanol_all_drinks_gallons_per_capita": "all_drink_per_capita"})

# Set index
# state_transformed.set_index("state_name", inplace=True)

state_transformed.head()

Unnamed: 0,state_name,year,beer_per_capita,wine_per_capita,spirit_per_capita,all_drink_per_capita
0,alabama,2016,1.09,0.26,0.65,2.01
1,alabama,2015,1.11,0.26,0.64,2.01
2,alabama,2014,1.13,0.25,0.62,2.0
3,alabama,2013,1.14,0.25,0.6,1.99
4,alabama,2012,1.16,0.24,0.6,2.0


In [5]:
combined_state = state_transformed['state_name'].astype(str)+'_'+state_transformed['year'].astype(str)
combined_state = pd.DataFrame(combined_state)
combined_state.head()

Unnamed: 0,0
0,alabama_2016
1,alabama_2015
2,alabama_2014
3,alabama_2013
4,alabama_2012


In [6]:
state_transformed = pd.concat([state_transformed, combined_state], axis=1)
state_transformed.head()

Unnamed: 0,state_name,year,beer_per_capita,wine_per_capita,spirit_per_capita,all_drink_per_capita,0
0,alabama,2016,1.09,0.26,0.65,2.01,alabama_2016
1,alabama,2015,1.11,0.26,0.64,2.01,alabama_2015
2,alabama,2014,1.13,0.25,0.62,2.0,alabama_2014
3,alabama,2013,1.14,0.25,0.6,1.99,alabama_2013
4,alabama,2012,1.16,0.24,0.6,2.0,alabama_2012


In [7]:
state_transformed = state_transformed.rename(columns={0: "combined"})
state_transformed.head()

Unnamed: 0,state_name,year,beer_per_capita,wine_per_capita,spirit_per_capita,all_drink_per_capita,combined
0,alabama,2016,1.09,0.26,0.65,2.01,alabama_2016
1,alabama,2015,1.11,0.26,0.64,2.01,alabama_2015
2,alabama,2014,1.13,0.25,0.62,2.0,alabama_2014
3,alabama,2013,1.14,0.25,0.6,1.99,alabama_2013
4,alabama,2012,1.16,0.24,0.6,2.0,alabama_2012


In [8]:
# create database connetion
connection_string = f"root:{mypass}@localhost/booze_db"
engine = create_engine(f'mysql://{connection_string}')

In [9]:
# Confirm tables
engine.table_names()

['state', 'weather_state_year']

In [10]:
# Load DataFrames into database
state_transformed.to_sql(name='state',\
                           con=engine, if_exists='replace', index=True)