In [1]:
from dash import Dash, dcc, html, Input, Output
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
# import data source
# data reference: https://www.abs.gov.au/statistics/labour/employment-and-unemployment/labour-force-australia-detailed/sep-2023/6291023a.xlsx
unemployment_rate_df = pd.read_excel("unemployment_rate2.xlsx")
# data reference: https://www.jobsandskills.gov.au/sites/default/files/2023-10/2023-09_nero_extract.xlsx
jobs_xlsx = pd.ExcelFile("2023-09_nero_extract.xlsx")
state_name_list = ["NSW","QLD","NT","WA","TAS","VIC","SA","ACT"]
for i in range(len(state_name_list)):
    if i == 0:
        job_df = pd.read_excel(jobs_xlsx,state_name_list[0])
    else:
        one_state_df = pd.read_excel(jobs_xlsx,state_name_list[i])
        job_df = pd.concat([job_df,one_state_df],ignore_index = True)
# data reference: https://www.jobsandskills.gov.au/sites/default/files/2023-10/Internet%20Vacancies%2C%20ANZSCO%20Skill%20Level%2C%20States%20and%20Territories%20-%20September%202023.xlsx
skill_level_xlsx = pd.ExcelFile("Internet Vacancies, ANZSCO Skill Level, States and Territories - September 2023.xlsx")
skill_level_df = pd.read_excel(skill_level_xlsx,"Trend")
latest_skill_level_df = skill_level_df[["State","Skill_level"]]
latest_skill_level_df["latest_internet_vacancies"] = skill_level_df.iloc[:,-1]
latest_skill_level_df = latest_skill_level_df[latest_skill_level_df["Skill_level"] > 0]
# data reference: https://www.jobsandskills.gov.au/sites/default/files/2023-10/Internet%20Vacancies%2C%20ANZSCO4%20Occupations%2C%20States%20and%20Territories%20-%20September%202023.xlsx
vacancies_job_xlsx = pd.ExcelFile("Internet Vacancies, ANZSCO4 Occupations, States and Territories - September 2023.xlsx")
vacancies_job_df = pd.read_excel(vacancies_job_xlsx,"4 digit 3 month average")
# data reference: https://www.jobsandskills.gov.au/data/recruitment-experiences-and-outlook-survey/recruitment-methods-used-by-employers
recruitment_methods_df = pd.read_excel("recruitment_methods.xlsx")
# Not able to find the median weekly income for each occupation, use random data to fill
salary_df = vacancies_job_df[["state","ANZSCO_TITLE"]]
salary_df["median_weekly_income"] = np.random.randint(1200, 1700, len(salary_df))

In [4]:
# match occupation types between datasets
vacancies_job_df = vacancies_job_df[vacancies_job_df["ANZSCO_TITLE"].isin(job_df["anzsco4_name"])]
job_df = job_df[job_df["anzsco4_name"].isin(vacancies_job_df["ANZSCO_TITLE"])]

In [5]:
# prepare for the slider
time_slider_min = unemployment_rate_df["Time"].dt.year.min()
time_slider_max = unemployment_rate_df["Time"].dt.year.max()

In [6]:
# prepare to the dropdown option for occupation
occupation_option = list(job_df["anzsco4_name"].unique())

In [7]:
# prepare for map location
capital_city_location = {"state":["NSW","QLD","NT","WA","TAS","VIC","SA","ACT"],
                         "longitude":[151.2093,153.0260,130.8444,115.8613,147.3257,144.9631,138.6007,149.1300],
                         "latitude":[-33.8688,-27.4705,-12.4637,-31.9523,-42.8826,-37.8136,-34.9285,-35.2809]}
capital_city_location_df = pd.DataFrame(capital_city_location)

In [8]:
app = Dash(__name__)

app.layout = html.Div([
    html.H1(children = "Labour Market Dashboard",style={'textAlign': 'center'}),
    html.Div([
        html.Div([
            html.Label("State"),
            dcc.Dropdown(options = ["NSW","QLD","NT","WA","TAS","VIC","SA","ACT"], multi = True, clearable = False, id = "state")]),
            html.Br(),
            html.Label("Time"),
            dcc.RangeSlider(time_slider_min, 
                       time_slider_max, 
                       id = "year_slider",
                       value = [time_slider_min,time_slider_max], 
                       marks={str(year): str(year) for year in unemployment_rate_df["Time"].dt.year.unique()},
                       step=None),
            dcc.Graph(id="unemployment"),
            dcc.Graph(id="skill_level_pie_chart"),
            dcc.Graph(id="method_bar_chart")
            ],
        style={'width': '48%', 'display': 'inline-block', 'float': 'left'}),
    html.Div([
        html.Div([
            html.Label("Occupation"),
            dcc.Dropdown(options = occupation_option, multi = False, clearable = False,id = "job")]),
            html.Br(),
            html.Br(),
            html.Br(),
            html.Br(),
            dcc.Graph(id="employment_map"),
            dcc.Graph(id="vacancies_line"),
            dcc.Graph(id="income_bar")
            ],
        style={'width': '48%', 'display': 'inline-block', 'float': 'right'})
])

In [9]:
@app.callback(
    Output("unemployment", "figure"),
    Input("state", "value"),
    Input("year_slider", "value"))
def update_unemployment(state,year_slider):
    for i in range(len(state)):
        if i == 0:
            updated_unemployment_rate_df = unemployment_rate_df[unemployment_rate_df["state"] == state[0]]
        else:
            state_unemployment_df = unemployment_rate_df[unemployment_rate_df["state"] == state[i]]                                               
            updated_unemployment_rate_df = pd.concat([updated_unemployment_rate_df,state_unemployment_df],
                                                     ignore_index = True)
    updated_unemployment_rate_df = updated_unemployment_rate_df[updated_unemployment_rate_df["Time"].dt.year >= year_slider[0]]
    updated_unemployment_rate_df = updated_unemployment_rate_df[updated_unemployment_rate_df["Time"].dt.year <= year_slider[1]]
    fig = px.line(updated_unemployment_rate_df, x = "Time", y = "unemployment_rate",color="state", title="{}-{} Unemployment Rate".format(year_slider[0],year_slider[1]))
    return fig

In [10]:
# code reference: https://plotly.com/python/pie-charts/
@app.callback(
    Output("skill_level_pie_chart", "figure"),
    Input("state", "value"))
def update_skill_level_pie(state):
    updated_skill_level_df = latest_skill_level_df[latest_skill_level_df["State"].isin(state)]
    skill_level_label = ["Skill Level 1", #Bachelor degree or higher
                         "Skill Level 2", #Advanced Diploma or Diploma
                         "Skill Level 3", #Certificate IV or III
                         "Skill Level 4", #Certificate II or III
                         "Skill Level 5"] #Certificate I or secondary education
    pie_num = len(state)
    specs_list = []
    for j in range(pie_num):
        specs_list.append({'type':'domain'})  
    fig_skill_pie = make_subplots(rows=1, cols=pie_num, specs=[specs_list])
    for i in range(pie_num):
        fig_skill_pie.add_trace(go.Pie(labels=skill_level_label, 
                                       values=list(updated_skill_level_df[updated_skill_level_df["State"] == state[i]]["latest_internet_vacancies"]),
                                       title = state[i],
                                       name=state[i]),1,i+1)
    fig_skill_pie.update_layout(title_text="Latest Skill Levels of Vacancy")
    return fig_skill_pie

In [11]:
# code reference: https://plotly.com/python/horizontal-bar-charts/
@app.callback(
    Output("method_bar_chart", "figure"),
    Input("state", "value"))
def update_method_bar_bar(state):
    updated_method_df = recruitment_methods_df[recruitment_methods_df["state"].isin(state)]
    fig_method_bar = px.bar(updated_method_df, 
                           x="Porportion", 
                           y="method", 
                           color="state",
                           title = "Latest Recruitment Methods Porportion",
                           barmode="group",
                           orientation='h')
    return fig_method_bar

In [12]:
# code reference: https://plotly.com/python/map-configuration/
# code reference: https://plotly.com/python/scatter-plots-on-maps/
@app.callback(
    Output("employment_map", "figure"),
    Input("state", "value"),
    Input("job", "value"),
    Input("year_slider", "value"))
def update_map(state,job,year_slider):
    selected_job_df = job_df[job_df["anzsco4_name"] == job]
    for i in range(len(state)):
        if i == 0:
            selected_job_state_df = selected_job_df[selected_job_df["state_name"] == state[0]]
        else:
            state_selected_job_df = selected_job_df[selected_job_df["state_name"] == state[i]]                                             
            selected_job_state_df = pd.concat([selected_job_state_df,state_selected_job_df],ignore_index = True)
    selected_job_state_df = selected_job_state_df[selected_job_state_df["date"].dt.year >= year_slider[0]]
    selected_job_state_df = selected_job_state_df[selected_job_state_df["date"].dt.year <= year_slider[1]]
    aggrate_job_state_df = selected_job_state_df.groupby(["state_name","date"]).sum().drop(["sa4_code","anzsco4_code"],axis  = 1)
    map_start_time = selected_job_state_df["date"].dt.date.min()
    map_end_time = selected_job_state_df["date"].dt.date.max()
    select_state_map = {"state": state}
    employment_map_df = pd.DataFrame(select_state_map)
    longitude_list = []
    latitude_list = []
    ending_employment_list = []
    growth_rate_list = []
    for i in range(len(state)):
        state_code = employment_map_df ["state"][i]
        start_employment = aggrate_job_state_df.loc[(state_code, str(map_start_time))].iloc[0]
        ending_employment = aggrate_job_state_df.loc[(state_code, str(map_end_time))].iloc[0]
        growth_rate = ((ending_employment - start_employment)/start_employment)*100
        longitude_list.append(capital_city_location_df["longitude"][capital_city_location_df["state"] == state_code].iloc[0])
        latitude_list.append(capital_city_location_df["latitude"][capital_city_location_df["state"] == state_code].iloc[0])
        ending_employment_list.append(ending_employment)
        growth_rate_list.append(round(growth_rate,2))
    employment_map_df["longitude"] = longitude_list
    employment_map_df["latitude"] = latitude_list
    employment_map_df["ending_employment"] = ending_employment_list
    employment_map_df["growth_rate"] = growth_rate_list
    fig_map = px.scatter_geo(employment_map_df, 
                         lat="latitude", 
                         lon="longitude",
                         color="growth_rate",
                         text = employment_map_df["state"],
                         size="ending_employment",
                         color_continuous_scale=px.colors.sequential.Sunset,
                         title = "{} Employment from {} to {}".format(job,year_slider[0],year_slider[1]))
    fig_map.update_geos(center={"lat":-25.2744,"lon":133.7751},
                    projection_scale=4.8,
                    lataxis_showgrid=True,
                    lonaxis_showgrid=True)
    return fig_map

In [13]:
@app.callback(
    Output("vacancies_line", "figure"),
    Input("state", "value"),
    Input("job", "value"),
    Input("year_slider", "value"))
def update_vacancies_line(state,job,year_slider):
    selected_vacancies_df = vacancies_job_df[vacancies_job_df["state"].isin(state)]
    selected_vacancies_df = selected_vacancies_df[selected_vacancies_df["ANZSCO_TITLE"] == job].drop(["ANZSCO_CODE","ANZSCO_TITLE"],axis = 1)
    for i in range(len(state)):
        if i == 0:
            vacancies_line_data = {"Time": list(selected_vacancies_df.columns[1:]),
                                   "Internet Vacancies":list(selected_vacancies_df[selected_vacancies_df["state"] == state[0]].iloc[0,1:])}
            vacancies_line_df = pd.DataFrame(vacancies_line_data)
            vacancies_line_df["state"] = state[0]
        else:
            state_vacancies_line_data = {"Time": list(selected_vacancies_df.columns[1:]),
                                         "Internet Vacancies":list(selected_vacancies_df[selected_vacancies_df["state"] == state[i]].iloc[0,1:])}
            state_vacancies_line_df = pd.DataFrame(state_vacancies_line_data)
            state_vacancies_line_df["state"] = state[i]
            vacancies_line_df = pd.concat([vacancies_line_df,state_vacancies_line_df],ignore_index = True)
    vacancies_line_df = vacancies_line_df[vacancies_line_df["Time"].dt.year >= year_slider[0]]
    vacancies_line_df = vacancies_line_df[vacancies_line_df["Time"].dt.year <= year_slider[1]]
    fig_vacancies = px.line(vacancies_line_df,
                            x = "Time", 
                            y = "Internet Vacancies",
                            color="state", 
                            title="{}-{} Vacancies Trend of {}".format(year_slider[0],year_slider[1],job))
    return fig_vacancies

In [14]:
@app.callback(
    Output("income_bar", "figure"),
    Input("state", "value"),
    Input("job", "value"))
def update_income_bar(state,job):
    update_salary_df = salary_df[salary_df["state"].isin(state)]
    update_salary_df = update_salary_df[salary_df["ANZSCO_TITLE"] == job]
    fig_income_bar = px.bar(update_salary_df, 
                           x="state", 
                           y="median_weekly_income",
                           title = "Latest Median Weekly Income")
    return fig_income_bar

In [15]:
app.run(jupyter_mode="external")

Address already in use
Port 8050 is in use by another program. Either identify and stop that program, or start the server with a different port.


AssertionError: 