In [1]:
#Import Dependencies
import pandas as pd
import numpy as np

#Import matplotlib dependencies
import matplotlib.pyplot as plt
import scipy.stats as st
from scipy.stats import linregress

#File to load
file_path = "../Data/minwage.csv"

In [2]:
#Create dataframe
min_wage_df = pd.read_csv(file_path,encoding='cp1252') 
min_wage_df.head()


Unnamed: 0,Year,State,State.Minimum.Wage,State.Minimum.Wage.2020.Dollars,Federal.Minimum.Wage,Federal.Minimum.Wage.2020.Dollars,Effective.Minimum.Wage,Effective.Minimum.Wage.2020.Dollars,CPI.Average,Department.Of.Labor.Uncleaned.Data,Department.Of.Labor.Cleaned.Low.Value,Department.Of.Labor.Cleaned.Low.Value.2020.Dollars,Department.Of.Labor.Cleaned.High.Value,Department.Of.Labor.Cleaned.High.Value.2020.Dollars,Footnote
0,1968,Alabama,0.0,0.0,1.15,8.55,1.15,8.55,34.8,...,0.0,0.0,0.0,0.0,
1,1968,Alaska,2.1,15.61,1.15,8.55,2.1,15.61,34.8,2.1,2.1,15.61,2.1,15.61,
2,1968,Arizona,0.468,3.48,1.15,8.55,1.15,8.55,34.8,18.72 - 26.40/wk(b),0.468,3.48,0.66,4.91,(b)
3,1968,Arkansas,0.15625,1.16,1.15,8.55,1.15,8.55,34.8,1.25/day(b),0.15625,1.16,0.15625,1.16,(b)
4,1968,California,1.65,12.26,1.15,8.55,1.65,12.26,34.8,1.65(b),1.65,12.26,1.65,12.26,(b)


In [3]:
# Show column names 
for col in min_wage_df.columns: 
    print(col) 

Year
State
State.Minimum.Wage
State.Minimum.Wage.2020.Dollars
Federal.Minimum.Wage
Federal.Minimum.Wage.2020.Dollars
Effective.Minimum.Wage
Effective.Minimum.Wage.2020.Dollars
CPI.Average
Department.Of.Labor.Uncleaned.Data
Department.Of.Labor.Cleaned.Low.Value
Department.Of.Labor.Cleaned.Low.Value.2020.Dollars
Department.Of.Labor.Cleaned.High.Value
Department.Of.Labor.Cleaned.High.Value.2020.Dollars
Footnote


In [4]:
clean_min_wage_df = min_wage_df[['Year', 'State', 'State.Minimum.Wage', 'State.Minimum.Wage.2020.Dollars', 'Federal.Minimum.Wage', 'Federal.Minimum.Wage.2020.Dollars', 'Effective.Minimum.Wage', 'Effective.Minimum.Wage.2020.Dollars']]
clean_min_wage_df.head(10)

Unnamed: 0,Year,State,State.Minimum.Wage,State.Minimum.Wage.2020.Dollars,Federal.Minimum.Wage,Federal.Minimum.Wage.2020.Dollars,Effective.Minimum.Wage,Effective.Minimum.Wage.2020.Dollars
0,1968,Alabama,0.0,0.0,1.15,8.55,1.15,8.55
1,1968,Alaska,2.1,15.61,1.15,8.55,2.1,15.61
2,1968,Arizona,0.468,3.48,1.15,8.55,1.15,8.55
3,1968,Arkansas,0.15625,1.16,1.15,8.55,1.15,8.55
4,1968,California,1.65,12.26,1.15,8.55,1.65,12.26
5,1968,Colorado,1.0,7.43,1.15,8.55,1.15,8.55
6,1968,Connecticut,1.4,10.41,1.15,8.55,1.4,10.41
7,1968,Delaware,1.25,9.29,1.15,8.55,1.25,9.29
8,1968,District of Columbia,1.25,9.29,1.15,8.55,1.25,9.29
9,1968,Florida,0.0,0.0,1.15,8.55,1.15,8.55


In [5]:
#Drop earlier years

# drop_early_df = clean_min_wage_df[clean_min_wage_df['Year'] > 1997]
# drop_early_df.head(10)


In [6]:
#Load in Latlngs
latlong_file_path = "../Data/statelatlong.csv"
latlong_df = pd.read_csv(latlong_file_path,encoding='cp1252') 
latlong_df = latlong_df.drop('State', 1)
latlong_df = latlong_df.rename(columns={'City': 'State'})
latlong_df.head()

Unnamed: 0,Latitude,Longitude,State
0,32.601011,-86.680736,Alabama
1,61.302501,-158.77502,Alaska
2,34.168219,-111.930907,Arizona
3,34.751928,-92.131378,Arkansas
4,37.271875,-119.270415,California


In [7]:

minwage_with_latlng = pd.merge(clean_min_wage_df,  
                     latlong_df,  
                     on ='State',  
                     how ='left')

minwage_with_latlng.head() 

Unnamed: 0,Year,State,State.Minimum.Wage,State.Minimum.Wage.2020.Dollars,Federal.Minimum.Wage,Federal.Minimum.Wage.2020.Dollars,Effective.Minimum.Wage,Effective.Minimum.Wage.2020.Dollars,Latitude,Longitude
0,1968,Alabama,0.0,0.0,1.15,8.55,1.15,8.55,32.601011,-86.680736
1,1968,Alaska,2.1,15.61,1.15,8.55,2.1,15.61,61.302501,-158.77502
2,1968,Arizona,0.468,3.48,1.15,8.55,1.15,8.55,34.168219,-111.930907
3,1968,Arkansas,0.15625,1.16,1.15,8.55,1.15,8.55,34.751928,-92.131378
4,1968,California,1.65,12.26,1.15,8.55,1.65,12.26,37.271875,-119.270415


In [8]:
#Import to SQL
from sqlalchemy import create_engine

rds_connection_string = "postgres:postgres@localhost:5432/project2_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [9]:
engine.table_names()

[]

In [10]:
### Use pandas to load csv converted DataFrame into database
minwage_with_latlng.to_sql(name='minwage_data', con=engine, if_exists='append', index=False)

In [11]:
##Confirm data has been added by querying the netflix_data table
pd.read_sql_query('select * from minwage_data', con=engine).head()

Unnamed: 0,Year,State,State.Minimum.Wage,State.Minimum.Wage.2020.Dollars,Federal.Minimum.Wage,Federal.Minimum.Wage.2020.Dollars,Effective.Minimum.Wage,Effective.Minimum.Wage.2020.Dollars,Latitude,Longitude
0,1968,Alabama,0.0,0.0,1.15,8.55,1.15,8.55,32.601011,-86.680736
1,1968,Alaska,2.1,15.61,1.15,8.55,2.1,15.61,61.302501,-158.77502
2,1968,Arizona,0.468,3.48,1.15,8.55,1.15,8.55,34.168219,-111.930907
3,1968,Arkansas,0.15625,1.16,1.15,8.55,1.15,8.55,34.751928,-92.131378
4,1968,California,1.65,12.26,1.15,8.55,1.65,12.26,37.271875,-119.270415
