<a href="https://colab.research.google.com/github/DonnaVakalis/Livability/blob/master/Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Minimum Wage Exploration

---

More notes here later...


## Setup



### Import Libraries


In [None]:
# Imports and global options

import pandas as pd
from matplotlib import pyplot as plt
import zipfile
import os
from google.colab import drive

import plotly.express as px 

# display numbers with two decimal places
pd.options.display.float_format = '{:,.2f}'.format

# mount google drive
drive.mount('/content/gdrive/') 
# check !ls "/content/gdrive/My Drive/Colab Notebooks/min_wage"

### Import Data

Data provenance: 
* Cleaned Min Wage Data [here](https://www.numbeo.com/property-investment/rankings_by_country.jsp?title=2020&displayColumn=6)
* Further wrangling performed by DJW [here](https://danieljwilson.com/datasets/)
* Includes addition by [DJW](https://danieljwilson.com) of Living Wage data [here](https://livingwage.mit.edu/)
* Election data from [MIT Election Lab](https://electionlab.mit.edu/data)

In [None]:
# import min wage data 
df_min=pd.read_csv("https://danieljwilson.com/datasets/min_wage/min_wage_usa_2020.csv", encoding= 'ISO-8859-1')

# import state codes
df_codes=pd.read_csv("https://danieljwilson.com/datasets/min_wage/USPS_two_letter_state.csv", encoding= 'unicode_escape')

# import election data
df_pol = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/min_wage/1976-2016-president.csv')

In [None]:
df_min.info()

In [None]:
df_codes.info()

### Clean/Tidy Data

In [None]:
# Using USPS state codes because Plotly will automatically associate these specific abbreviations with location on the map
df_min = pd.merge(df_min,  df_codes, on='State',how='left') 
df_min.tail()

Unnamed: 0,State,Wage,lw_10,lw_22,Abbreviation
46,Virginia,7.25,14.0,17.62,VA
47,Washington,13.5,13.47,17.45,WA
48,West Virginia,8.75,10.83,14.76,WV
49,Wisconsin,7.25,11.4,16.49,WI
50,Wyoming,7.25,11.05,16.54,WY


## Exploration 1: 2020 minimum wage by state


### Map of minimum wage

In [None]:
fig = px.choropleth(df_min,   
                    locations="Abbreviation",  # DataFrame column with locations
                    color="Wage",  # DataFrame column with color values
                    hover_name="State", # DataFrame column hover info
                    locationmode = 'USA-states', # Set to plot as US States
                    color_continuous_scale= px.colors.sequential.RdBu) # Colour palette
fig.update_layout(
    title_text = 'Minimum Wage by State, 2020', # Create a Title
    geo_scope='usa',  # Plot only the USA instead of globe
)
fig.show()  # Output the plot to the screen

### Overlay relative minimum wage with other stats: 
* proportion of living wage
* state political party 
* state average age
* Walmarts per capita...

#### proportion of living wage

In [None]:
# Add column that is minimum wage/living wage
min_df['lw_10_prop'] = (min_df.Wage/min_df.lw_10).round(2)
df_min['prop_lw10'] = (df_min.Wage/df_min.lw_10).round(2)

fig = px.choropleth(df_min,   
                    locations="Abbreviation",  # DataFrame column with locations
                    color="prop_lw10",  # DataFrame column with color values
                    hover_name="State", # DataFrame column hover info
                    locationmode = 'USA-states', # Set to plot as US States
                    color_continuous_scale= px.colors.sequential.gray) # Colour palette
fig.update_layout(
    title_text = 'Proportion Minimum Wage:Living Wage by State, 2020', # Create a Title
    geo_scope='usa',  # Plot only the USA instead of globe
)
fig.show()  # Output the plot to the screen

NameError: ignored

#### political party

In [None]:
df_pol = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/min_wage/1976-2016-president.csv')
df_pol = df_pol[['year','state','state_po','party' ]]
df_pol.info()
df_pol = df_pol.groupby(['year','state_po']).size().reset_index()
df_pol.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3740 entries, 0 to 3739
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   year      3740 non-null   int64 
 1   state     3740 non-null   object
 2   state_po  3740 non-null   object
 3   party     3404 non-null   object
dtypes: int64(1), object(3)
memory usage: 117.0+ KB


Unnamed: 0,year,state_po,0
0,1976,AK,4
1,1976,AL,7
2,1976,AR,4
3,1976,AZ,8
4,1976,CA,7


#### Average Age

#### Walmarts per capita

## Exploration 1: 2020 minimum wage by city


In [None]:
 # libraries
import Basemap
import numpy as np
import matplotlib.pyplot as plt
 
# Make a data frame with the GPS of a few cities:
data = pd.DataFrame({
'lat':[-58, 2, 145, 30.32, -4.03, -73.57, 36.82, -38.5],
'lon':[-34, 49, -38, 59.93, 5.33, 45.52, -1.29, -12.97],
'name':['Buenos Aires', 'Paris', 'melbourne', 'St Petersbourg', 'Abidjan', 'Montreal', 'Nairobi', 'Salvador']
})
 
# A basic map
m=Basemap(llcrnrlon=-160, llcrnrlat=-75,urcrnrlon=160,urcrnrlat=80)
m.drawmapboundary(fill_color='#A6CAE0', linewidth=0)
m.fillcontinents(color='grey', alpha=0.7, lake_color='grey')
m.drawcoastlines(linewidth=0.1, color="white")
 
# Add a marker per city of the data frame!
m.plot(data['lat'], data['lon'], linestyle='none', marker="o", markersize=16, alpha=0.6, c="orange", markeredgecolor="black", markeredgewidth=1)

 

ModuleNotFoundError: ignored

In [None]:
fig = px.choropleth(df_min,   
                    locations="Abbreviation",  # DataFrame column with locations
                    color="Wage",  # DataFrame column with color values
                    hover_name="State", # DataFrame column hover info
                    locationmode = 'USA-states', # Set to plot as US States
                    color_continuous_scale= px.colors.sequential.RdBu) # Colour palette
fig.update_layout(
    title_text = 'Minimum Wage by State, 2020', # Create a Title
    geo_scope='usa',  # Plot only the USA instead of globe
)
fig.show()  # Output the plot to the screen