In [1]:
import pandas as pd
import numpy as np
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, join, outerjoin, MetaData, Table

In [3]:
#Extract csv into dataframes

wage_file = "Data/Minimum Wage Data.csv"
min_wage_df = pd.read_csv(wage_file, low_memory=False, encoding= 'latin')
min_wage_df.head()

Unnamed: 0,Year,State,State.Minimum.Wage,State.Minimum.Wage.2020.Dollars,Federal.Minimum.Wage,Federal.Minimum.Wage.2020.Dollars,Effective.Minimum.Wage,Effective.Minimum.Wage.2020.Dollars,CPI.Average,Department.Of.Labor.Uncleaned.Data,Department.Of.Labor.Cleaned.Low.Value,Department.Of.Labor.Cleaned.Low.Value.2020.Dollars,Department.Of.Labor.Cleaned.High.Value,Department.Of.Labor.Cleaned.High.Value.2020.Dollars,Footnote
0,1968,Alabama,0.0,0.0,1.15,8.55,1.15,8.55,34.8,...,0.0,0.0,0.0,0.0,
1,1968,Alaska,2.1,15.61,1.15,8.55,2.1,15.61,34.8,2.1,2.1,15.61,2.1,15.61,
2,1968,Arizona,0.468,3.48,1.15,8.55,1.15,8.55,34.8,18.72 - 26.40/wk(b),0.468,3.48,0.66,4.91,(b)
3,1968,Arkansas,0.15625,1.16,1.15,8.55,1.15,8.55,34.8,1.25/day(b),0.15625,1.16,0.15625,1.16,(b)
4,1968,California,1.65,12.26,1.15,8.55,1.65,12.26,34.8,1.65(b),1.65,12.26,1.65,12.26,(b)


In [5]:
min_wage_df.columns

Index(['Year', 'State', 'State.Minimum.Wage',
       'State.Minimum.Wage.2020.Dollars', 'Federal.Minimum.Wage',
       'Federal.Minimum.Wage.2020.Dollars', 'Effective.Minimum.Wage',
       'Effective.Minimum.Wage.2020.Dollars', 'CPI.Average',
       'Department.Of.Labor.Uncleaned.Data',
       'Department.Of.Labor.Cleaned.Low.Value',
       'Department.Of.Labor.Cleaned.Low.Value.2020.Dollars',
       'Department.Of.Labor.Cleaned.High.Value',
       'Department.Of.Labor.Cleaned.High.Value.2020.Dollars', 'Footnote'],
      dtype='object')

In [6]:
min_wage_df = min_wage_df.rename(columns={'State.Minimum.Wage': 'Past_State_Min_Wage', 'State.Minimum.Wage.2020.Dollars':'2020_State_Min_Wage',
                                          'Federal.Minimum.Wage':'Past_Fed_Min_Wage', 'Federal.Minimum.Wage.2020.Dollars':'2020_Fed_Min_Wage',
                                          'Effective.Minimum.Wage':'Past_Eff_Min_Wage','Effective.Minimum.Wage.2020.Dollars':'2020_Eff_Min_Wage',
                                          'CPI.Average': 'CPI_Avg', 'Department.Of.Labor.Cleaned.Low.Value':'DOL_Low', 'Department.Of.Labor.Cleaned.Low.Value.2020.Dollars':
                                          '2020_DOL_Low','Department.Of.Labor.Cleaned.High.Value':'DOL_High','Department.Of.Labor.Cleaned.High.Value.2020.Dollars':'2020_DOL_High'})

In [9]:
min_wage_df.columns
min_wage_df = min_wage_df[['Year', 'State', 'Past_State_Min_Wage', '2020_State_Min_Wage',
       'Past_Fed_Min_Wage', '2020_Fed_Min_Wage', 'Past_Eff_Min_Wage',
       '2020_Eff_Min_Wage', 'CPI_Avg','DOL_Low', '2020_DOL_Low', 'DOL_High', '2020_DOL_High']]
min_wage_df.head()

Unnamed: 0,Year,State,Past_State_Min_Wage,2020_State_Min_Wage,Past_Fed_Min_Wage,2020_Fed_Min_Wage,Past_Eff_Min_Wage,2020_Eff_Min_Wage,CPI_Avg,DOL_Low,2020_DOL_Low,DOL_High,2020_DOL_High
0,1968,Alabama,0.0,0.0,1.15,8.55,1.15,8.55,34.8,0.0,0.0,0.0,0.0
1,1968,Alaska,2.1,15.61,1.15,8.55,2.1,15.61,34.8,2.1,15.61,2.1,15.61
2,1968,Arizona,0.468,3.48,1.15,8.55,1.15,8.55,34.8,0.468,3.48,0.66,4.91
3,1968,Arkansas,0.15625,1.16,1.15,8.55,1.15,8.55,34.8,0.15625,1.16,0.15625,1.16
4,1968,California,1.65,12.26,1.15,8.55,1.65,12.26,34.8,1.65,12.26,1.65,12.26


In [14]:
min_wage_df['Past_Perc_of_Fed'] = round((min_wage_df['Past_State_Min_Wage']/min_wage_df['Past_Fed_Min_Wage']) * 100, 2)
min_wage_df['2020_Perc_of_Fed'] = round((min_wage_df['2020_State_Min_Wage']/min_wage_df['2020_Fed_Min_Wage']) * 100, 2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [18]:
min_wage_df.columns
min_wage_df = min_wage_df[['Year', 'State', 'Past_State_Min_Wage', '2020_State_Min_Wage',
       'Past_Fed_Min_Wage', '2020_Fed_Min_Wage', 'Past_Eff_Min_Wage',
       '2020_Eff_Min_Wage', 'CPI_Avg', 'Past_Perc_of_Fed', '2020_Perc_of_Fed']]
min_wage_df.head()

Unnamed: 0,Year,State,Past_State_Min_Wage,2020_State_Min_Wage,Past_Fed_Min_Wage,2020_Fed_Min_Wage,Past_Eff_Min_Wage,2020_Eff_Min_Wage,CPI_Avg,Past_Perc_of_Fed,2020_Perc_of_Fed
0,1968,Alabama,0.0,0.0,1.15,8.55,1.15,8.55,34.8,0.0,0.0
1,1968,Alaska,2.1,15.61,1.15,8.55,2.1,15.61,34.8,182.61,182.57
2,1968,Arizona,0.468,3.48,1.15,8.55,1.15,8.55,34.8,40.7,40.7
3,1968,Arkansas,0.15625,1.16,1.15,8.55,1.15,8.55,34.8,13.59,13.57
4,1968,California,1.65,12.26,1.15,8.55,1.65,12.26,34.8,143.48,143.39


In [66]:
educ_path = 'Data/Education.csv'
educ_df = pd.read_csv(educ_path)
educ_df.head()

Unnamed: 0,FIPS Code,State,Area name,2003 Rural-urban Continuum Code,2003 Urban Influence Code,2013 Rural-urban Continuum Code,2013 Urban Influence Code,"Less than a high school diploma, 1970","High school diploma only, 1970","Some college (1-3 years), 1970",...,"Percent of adults completing some college or associate's degree, 2000","Percent of adults with a bachelor's degree or higher, 2000","Less than a high school diploma, 2011-2015","High school diploma only, 2011-2015","Some college or associate's degree, 2011-2015","Bachelor's degree or higher, 2011-2015","Percent of adults with less than a high school diploma, 2011-2015","Percent of adults with a high school diploma only, 2011-2015","Percent of adults completing some college or associate's degree, 2011-2015","Percent of adults with a bachelor's degree or higher, 2011-2015"
0,0,US,United States,,,,,52373312,34158051,11650730,...,27.4,24.4,28229094,58722528,61558628,62952272,13.3,27.8,29.1,29.8
1,1000,AL,Alabama,,,,,1062306,468269,136287,...,25.9,19.0,509891,1005295,962515,761650,15.7,31.0,29.7,23.5
2,1001,AL,Autauga County,2.0,2.0,2.0,2.0,6611,3757,933,...,26.9,18.0,4656,12182,11044,8437,12.8,33.5,30.4,23.2
3,1003,AL,Baldwin County,4.0,5.0,3.0,2.0,18726,8426,2334,...,29.3,23.1,14360,39431,43500,39710,10.5,28.8,31.8,29.0
4,1005,AL,Barbour County,6.0,6.0,6.0,6.0,8120,2242,581,...,21.3,10.9,5021,6490,4943,2354,26.7,34.5,26.3,12.5


In [74]:
state_educ_df = educ_df[educ_df['2003 Rural-urban Continuum Code'].isnull()]

In [75]:
state_educ_df = state_educ_df

Unnamed: 0,FIPS Code,State,Area name,2003 Rural-urban Continuum Code,2003 Urban Influence Code,2013 Rural-urban Continuum Code,2013 Urban Influence Code,"Less than a high school diploma, 1970","High school diploma only, 1970","Some college (1-3 years), 1970",...,"Percent of adults completing some college or associate's degree, 2000","Percent of adults with a bachelor's degree or higher, 2000","Less than a high school diploma, 2011-2015","High school diploma only, 2011-2015","Some college or associate's degree, 2011-2015","Bachelor's degree or higher, 2011-2015","Percent of adults with less than a high school diploma, 2011-2015","Percent of adults with a high school diploma only, 2011-2015","Percent of adults completing some college or associate's degree, 2011-2015","Percent of adults with a bachelor's degree or higher, 2011-2015"
0,0,US,United States,,,,,52373312,34158051,11650730,...,27.4,24.4,28229094,58722528,61558628,62952272,13.3,27.8,29.1,29.8
1,1000,AL,Alabama,,,,,1062306,468269,136287,...,25.9,19.0,509891,1005295,962515,761650,15.7,31.0,29.7,23.5
69,2000,AK,Alaska,,,,,44994,50820,20052,...,35.7,24.7,36493,129324,168702,129921,7.9,27.8,36.3,28.0
70,2010,AK,Aleutian Islands,,,,,1237,1112,380,...,,,,,,,,,,
80,2105,AK,Hoonah-Angoon Census Area,,,9.0,10.0,,,,...,,,155,506,524,430,9.6,31.3,32.4,26.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3011,53000,WA,Washington,,,,,666656,661800,265140,...,34.4,27.7,451591,1097839,1619878,1552130,9.6,23.3,34.3,32.9
3051,54000,WV,West Virginia,,,,,566026,267165,70605,...,21.0,14.8,195354,528555,325536,248673,15.0,40.7,25.1,19.2
3107,55000,WI,Wisconsin,,,,,1060531,811048,230874,...,28.1,22.4,347428,1239523,1208472,1077696,9.0,32.0,31.2,27.8
3180,56000,WY,Wyoming,,,,,65252,63630,26074,...,35.0,21.9,29566,112872,143034,98800,7.7,29.4,37.2,25.7


In [40]:
# create engine to postgres
connection_string = "postgres:postgres@localhost:5432/min_wage_DB"

# reflect the tables
engine = create_engine(f'postgresql://{connection_string}') 



In [24]:
#add dataaframe to table
min_wage_df.to_sql(name='min_wage', con=engine, if_exists='replace', index=True)

In [25]:
# reflect an existing database into a new model
Base = automap_base()

# reflect the tables
Base.prepare(engine, reflect =True)

In [26]:

# Confirm tables
engine.table_names()


['min_wage']