In [None]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns
import os, warnings, gc

color = sns.color_palette()
warnings.filterwarnings("ignore")
%matplotlib inline

wage = pd.read_csv("../input/Minimum Wage Data.csv", encoding = "Windows-1252")

## General Info

In [None]:
wage.head()

In [None]:
wage.describe()

In [None]:
wage.info()

## EDA

In [None]:
diff_count = 0
idex = []

for i, (w1, w2) in enumerate(zip(wage["High.Value"], wage["Low.Value"])):
    if abs(w1 - w2) > 0:
        diff_count += 1
        idex.append(i)
        
print("There are {} times that some states changed wage".format(diff_count))

In [None]:
wage.iloc[idex]

Since not every state increase their minimum wage in each year, we will focus on High Value. 

In [None]:
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected = True)

print("Plotly environment setup, done!")

In [None]:
data = []
states = wage["State"].unique()

for s in states:
    temp = wage[wage["State"] == s][["Year", "High.Value"]]
    trace = go.Scatter(x = temp["Year"], y = temp["High.Value"], name = s)
    data.append(trace)

layout = dict(title = "US minimum wage from 1967 - 2017",
              xaxis = dict(title = "Year", ticklen = 5, zeroline = False),
              yaxis = dict(title = "Wage", ticklen = 5, zeroline = False))
fig = dict(data = data, layout = layout)
iplot(fig)

We found:
* The dataset is incompleted since some states are "paying" zero minimum wages
* Without digging deeper or combining other datasets (GDP, population, etc.), we see that minimum wages increase consistently

### Economic Crises during 1967 - 2017
(Resources: https://en.wikipedia.org/wiki/List_of_economic_crises)
* 1970s
    * 1970s energy crisis
        * OPEC oil price shock (1973)
        * 1979 energy crisis (1979)
* 1980s
    * Early 1980s Recession
    * Black Monday (1987)  (US)
    * Savings and loan crisis failure of 1,043 out of the 3,234 S&Ls from 1986 to 1995 in the U.S.
* 1990s
    * Early 1990s Recession
* 21st century
    * 2000s
        * Early 2000s recession
            * Dot-com bubble (2000-2002) (US)
        * 2007-2009 Financial Crisis
    * Late-2000s recession (worldwide)
        * 2000s energy crisis (2003-2009) oil price bubble
        * Subprime mortgage crisis (US)(2007-2010)
        * United States housing bubble and United States housing market correction (US)(2003-2011)
        * Automotive industry crisis of 2008–2010 (US)
* 2010s

Question: Do economic crises have a strong effect on U.S. minimum wage?

Problem: Each region of U.S. has different industries, for example, states in south are mostly Agriculture industries and Auto manufactures, while west coast are Internet/Software industries

In [None]:
regions = {"Northeast": ["Connecticut", "Maine", "Massachusetts", "New Hampshire", "Rhode Island", "Vermont",
                         "New Jersey", "New York", "Pennsylvania"],
           "Mid-West": ["Illinois", "Indiana", "Michigan", "Ohio", "Wisconsin", "Iowa", "Kansas", "Minnesota", 
                        "Missouri", "Nebraska", "North Dakota", "South Dakota"],
           "South": ["Delaware", "Florida", "Georgia", "Maryland", "North Carolina", "South Carolina", "Virginia", 
                     "District of Columbia", "West Virginia", "Alabama", "Kentucky", "Mississippi", "Tennessee", 
                     "Arkansas", "Louisiana", "Oklahoma", "Texas"],
           "West": ["Arizona", "Colorado", "Idaho", "Montana", "Nevada", "New Mexico", "Utah", "Wyoming", 
                    "Alaska", "California", "Hawaii", "Oregon", "Washington"],
           "Other": ["Federal (FLSA)", "Guam", "Puerto Rico", "U.S. Virgin Islands"]}

def finding_regions(state):
    if state in regions["Northeast"]:
        return "Northeast"
    elif state in regions["Mid-West"]:
        return "Mid-West"
    elif state in regions["South"]:
        return "South"
    elif state in regions["West"]:
        return "West"
    elif state in regions["Other"]:
        return "Other"
    
wage["Region"] = wage["State"].apply(finding_regions)

In [None]:
for r in regions.keys():
    data = []
    states = regions[r]
    for s in states:
        temp = wage[wage["State"] == s][["Year", "High.Value"]]
        trace = go.Scatter(x = temp["Year"], y = temp["High.Value"], name = s)
        data.append(trace)

    layout = dict(title = "{} region minimum wage from 1967 - 2017".format(r),
                  xaxis = dict(title = "Year", ticklen = 5, zeroline = False),
                  yaxis = dict(title = "Wage", ticklen = 5, zeroline = False))
    fig = dict(data = data, layout = layout)
    iplot(fig)

### To be continued...