In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

import dash
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

## Download and wrangle the data
#%%capture
gss = pd.read_csv("https://github.com/jkropko/DS-6001/raw/master/localdata/gss2018.csv",
                 encoding='cp1252', na_values=['IAP','IAP,DK,NA,uncodeable', 'NOT SURE',
                                               'DK', 'IAP, DK, NA, uncodeable', '.a', "CAN'T CHOOSE"])

mycols = ['id', 'wtss', 'sex', 'educ', 'region', 'age', 'coninc',
          'prestg10', 'mapres10', 'papres10', 'sei10', 'satjob',
          'fechld', 'fefam', 'fepol', 'fepresch', 'meovrwrk'] 

gss_clean = gss[mycols]
gss_clean = gss_clean.rename({'wtss':'weight', 
                              'educ':'education', 
                              'coninc':'income', 
                              'prestg10':'job_prestige',
                              'mapres10':'mother_job_prestige', 
                              'papres10':'father_job_prestige', 
                              'sei10':'socioeconomic_index', 
                              'fechld':'relationship', 
                              'fefam':'male_breadwinner', 
                              'fehire':'hire_women', 
                              'fejobaff':'preference_hire_women', 
                              'fepol':'men_bettersuited', 
                              'fepresch':'child_suffer',
                              'meovrwrk':'men_overwork'},axis=1)

gss_clean.age = gss_clean.age.replace({'89 or older':'89'})
gss_clean.age = gss_clean.age.astype('float')

## Generate the individual tables and figures

### Markdown text
markdown_text = '''
The gender wage gap is a heated and much studied topic. Questions include ["What is the gender pay gap and is it real?"](https://www.epi.org/publication/what-is-the-gender-pay-gap-and-is-it-real/) and what are the ["systemic issues are at the root of lower wages for working women"](https://blog.dol.gov/2021/03/19/5-facts-about-the-state-of-the-gender-pay-gap). Given that the gender wage gap appears to be a persistent problem, and that the ["Equal Pay Act has been the law for more than 50 years"](https://nwlc.org/issue/equal-pay-and-the-wage-gap/), it is puzzling - if we are a society that values equality, justice and fairness - that we continue to debate and - in fact - tolerate it.

This study leverages data from the The General Social Survey (GSS). According to the GSS website, the GSS ["is a nationally representative survey of adults in the United States conducted since 1972. The GSS collects data on contemporary American society in order to monitor and explain trends in opinions, attitudes and behaviors. The GSS has adapted questions from earlier surveys, thereby allowing researchers to conduct comparisons for up to 80 years."](http://www.gss.norc.org/About-The-GSS) The code book can be found [here](http://www.gss.norc.org/About-The-GSS).
'''

### Table
gss_display = gss_clean.groupby('sex').agg({'sex':'size',
                                            'income':'mean',
                                            'job_prestige':'mean',
                                            'socioeconomic_index':'mean',
                                           'education':'mean'})

gss_display = gss_display.rename({'sex':'Gender',
                                   'income':'Avg. Income',
                                   'job_prestige':'Avg. Occup. Prestige',
                                   'socioeconomic_index':'Avg. Socio-eco Index',
                                   'education':'Avg. Education'}, axis=1)
gss_display = round(gss_display, 2)
gss_display = gss_display.reset_index().rename({'sex':'Gender'}, axis=1)

table = ff.create_table(gss_display)
table.show()


### Barplot
gss_bar = gss_clean.groupby(['sex', 'male_breadwinner']).agg({'male_breadwinner':'size'})

gss_bar = gss_bar.rename({'male_breadwinner':'Count'}, axis=1) #needed to avoid the same name as the index
gss_bar = gss_bar.reset_index()
gss_bar = gss_bar.rename({'sex':'Gender', 'male_breadwinner':'Male as Breadwinner'}, axis=1)

fig_bar = px.bar(gss_bar, x='Male as Breadwinner', y='Count', color='Gender',
            #labels={'male_breadwinner':'Male as Breadwinner'},
            hover_data = ['Gender', 'Male as Breadwinner', 'Count'],
            barmode = 'group',
            text='Count')
fig_bar.show()

### Line plot
gss_scatter = gss_clean[~gss_clean.sex.isnull()]
gss_scatter = gss_scatter.rename({'sex':'Gender'}, axis=1)

#gss_scatter
fig_scatter = px.scatter(gss_scatter, x='job_prestige', y='income', 
                 color = 'Gender', 
                 opacity = .5, 
                 trendline='lowess',
                 height=600, width=600,
                 labels={'job_prestige':'Occupational Prestige', 
                        'income':'Income'},
                 hover_data=['income', 'job_prestige', 'Gender'])
fig_scatter.show()

### Box plots
#Box A
fig_box2A = px.box(gss_clean, x = 'sex', y='income', 
                   labels={'income':'Income', 'sex':''})
fig_box2A.update_layout(showlegend=False)
fig_box2A.show()

#Box B
fig_box2B = px.box(gss_clean, x = 'sex', y='job_prestige', 
                   labels={'job_prestige':'Occupational Prestige', 'sex':''})
fig_box2B.update_layout(showlegend=False)
fig_box2B.show()

### Facets on occupational prestige
gss_sub = gss_clean[['income', 'sex', 'job_prestige']]
gss_sub['job_prestige'] = pd.qcut(gss_sub['job_prestige'], q=6)
gss_sub = gss_sub.rename({'job_prestige':'Occupational Prestige Score'}, axis=1)
gss_sub = gss_sub.dropna()

fig_facet = px.box(gss_sub, x='sex', y='income', color='sex',
             facet_col='Occupational Prestige Score', facet_col_wrap=2,
             labels={'income':'Income', 'sex':'', 'job prestige':'Occupational Prestige'})

fig_facet.update_layout(showlegend=False)
fig_facet.show()

### Scatterplot data
ft_columns = [col for col in anes if col.startswith('ft')] 
cat_columns = ['sex', 'partyID', 'vote', 'ideology'] 
anes_ft = anes[ft_columns + cat_columns].dropna()

### Create app
app = JupyterDash(__name__, external_stylesheets=external_stylesheets)

#line below may not be needed
server = app.server

app.layout = html.Div(
    [
        html.H1("Exploring the gender wage gap with the GSS data"),
        dcc.Markdown(children = markdown_text),
        
        html.H2("Comparing indicators across gender"),
        dcc.Graph(figure = table),
        
        html.H2("'Men as the Breadwinner'"),
        html.H3("Level of agreement by gender"),
        dcc.Graph(figure = fig_bar),
        
        html.H2("Income on occupational prestige by gender"),
        dcc.Graph(figure = fig_scatter),
        
        html.Div([
            
            html.H2("Distribution of income by gender"),
            dcc.Graph(figure = fig_box2A)
            
        ], style = {'width':'48%', 'float':'left'}),
        
        html.Div([
        
            html.H2("Distribution of occupational prestige by gender"),
            dcc.Graph(figure = fig_box2B)
            
        ], style = {'width':'48%', 'float':'left'}),
        
        html.H2("Occupational prestige (six categories)"),
        html.H3("by income and gender"),
        dcc.Graph(figure = fig_facet)
    ]
)

if __name__ == '__main__': 
    app.run_server(mode='inline', debug=True) to app.run_server(debug=True)
