In [1]:
import pandas as pd
import numpy as np

import plotly.express as px
from plotly.offline import *

In [57]:
repo1 = 'C:/Users/benno/OneDrive/Python/Leif/bls_occupational_educational.csv'
repo2 = 'C:/Users/benno/OneDrive/Python/Leif/bls_occupational_projections.csv'
repo3 = 'C:/Users/benno/OneDrive/Python/Leif/bls_occupational_titles.csv'

educate = pd.read_csv(repo1, skipinitialspace=True)
project = pd.read_csv(repo2, skipinitialspace=True)
titles = pd.read_csv(repo3, skipinitialspace=True)

pd.set_option('display.max_rows',None)
master = educate.merge(project, on='occ_code').merge(titles, on='occ_code')

In [58]:
# What are the top 25 occupations that will experience the most growth in the next decade

master['Change (%)'] = ((master.employment_2029 - master.employment_2019) / master.employment_2019) * 100

growth = master.groupby(['occ_title','occ_code']).agg({'Change (%)':np.mean}).sort_values(by='Change (%)',ascending=False).reset_index()

fig1 = px.bar(growth[:25], y='occ_code',x='Change (%)', color='occ_title', color_discrete_sequence=px.colors.qualitative.Dark24, labels={'occ_title': 'Title of occupation'}).update_yaxes(categoryorder='total ascending')
fig1.update_layout({'title':{'text':'Growth by Occupation, 10 Year Change', 'font':{'size':28}}, 
                    'xaxis':{'title':{'text':'Change, 2019-2029 (%)'}}, 
                    'yaxis':{'title':{'text':'Occupation'}}, 
                   })
iplot(fig1)

In [59]:
# What are the top 10 highest paying occupations that require less than 5 years of experience in a related occupation?

lessexp = master.groupby(['occ_title','occ_code','experience_required_in_a_related_occupation']).agg({'median_wage':np.mean}).reset_index()
lessexp_top10 = lessexp[lessexp.experience_required_in_a_related_occupation.isin(['None','Less than5 years'])].sort_values(by='median_wage', ascending=False)[:10]

fig2 = px.bar(lessexp_top10, y='occ_code',x='median_wage', color='occ_title', color_discrete_sequence=px.colors.qualitative.Dark24, labels={'occ_title': 'Title of occupation'}).update_yaxes(categoryorder='total ascending')
fig2.update_layout({'title':{'text':"Median Wage by Occupation, Less Than 5 Years' Experience", 'font':{'size':28}}, 
                    'xaxis':{'title':{'text':'Median Wage ($)'}, 'range':[200000, 210000]}, 
                    'yaxis':{'title':{'text':'Occupation'}}, 
                   })
iplot(fig2)

In [60]:
# What occupations pay more than $40k/yr on average and don't usually require a bachelor's degree

paymorethan40k = master[(master.median_wage > 40000) & (master.percentage_bachelors_degree_or_higher > 0.800)]

fig3 = px.scatter(paymorethan40k, x='median_wage',y='percentage_bachelors_degree_or_higher', size='median_wage', color='occ_title', color_discrete_sequence=px.colors.qualitative.Dark24, labels={'occ_title': 'Title of occupation'}).update_yaxes(categoryorder='category ascending')
fig3.update_layout({'title':{'text':'Jobs With 40K Salary (or higher), Bachelors', 'font':{'size':28}}, 
                    'xaxis':{'title':{'text':'Median Wage($)'}}, 
                    'yaxis':{'title':{'text':'Bachelors (%)'}}, 
                   })
iplot(fig3)

In [61]:
# What's the distribution of "median wage" by typical educational background for entry

import plotly.figure_factory as ff

fig4 = ff.create_distplot(hist_data=[master.median_wage], group_labels=['all education'], bin_size=2000)
fig4.update_layout({'title':{'text':'Distribution of Median Wage, All Education Levels', 'font':{'size':28}},'showlegend':False,
                   'xaxis':{'title':{'text':'Median Wage ($)'}}, 'yaxis':{'title':{'text':'Frequency'}}})
iplot(fig4)

from plotly.subplots import make_subplots

hist_data = []
group_labels = []

for edu in master.typical_educational_background_for_entry.unique():
    hist_data.append(master.median_wage[master.typical_educational_background_for_entry.isin([edu])])
    group_labels.append(edu)

fig5 = make_subplots(rows=4,cols=2, subplot_titles=group_labels, vertical_spacing= 0.15,
                    x_title='Median Wage ($)',
                    y_title='Frequency')
rowcols = [[1,1],[1,2],[2,1],[2,2],[3,1],[3,2],[4,1],[4,2]]

for subplot in range(8):
    fig5.add_histogram(name=group_labels[subplot],
                       x=hist_data[subplot],
                      row = rowcols[subplot][0],
                    col = rowcols[subplot][1],
                      xaxis='x1').update_layout({'xaxis':{'range':[0,220000]}})
    fig5.update_xaxes(matches='x')

fig5.update_layout({'title':{'text':'Distribution of Median Wage, By Education Level','font':{'size':28}},'legend':{'title':{'text':'Education'}}})
fig5.update_annotations({'font_size':12})

iplot(fig5)

distribution_by_education = master.groupby(['typical_educational_background_for_entry'])['median_wage']
distribution_by_education_summary = distribution_by_education.describe().reset_index().round(2)

table_data = distribution_by_education_summary[['typical_educational_background_for_entry','25%','50%','75%']]

fig6 = ff.create_table(table_data, height_constant=60)
fig6.update_layout({'width':1400})
iplot(fig6)
