# CP321 - Final Project

Ethan

169036218

Friday, April 4, 2025

**Video Link: **

In [330]:
# Imports
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px
import re

In [331]:
# Load all the .csv files for each province/territory
df_can = pd.read_csv("canada.csv", skiprows=1)
df_newfound = pd.read_csv("newfoundland_and_labrador.csv", skiprows=1)
df_pei = pd.read_csv("prince_edward_island.csv", skiprows=1)
df_nova = pd.read_csv("nova_scotia.csv", skiprows=1)
df_brunswick = pd.read_csv("new_brunswick.csv", skiprows=1)
df_quebec = pd.read_csv("quebec.csv", skiprows=1)
df_ontario = pd.read_csv("ontario.csv", skiprows=1)
df_manitoba = pd.read_csv("manitoba.csv", skiprows=1)
df_saskat = pd.read_csv("saskatchewan.csv", skiprows=1)
df_alberta = pd.read_csv("alberta.csv", skiprows=1)
df_bc = pd.read_csv("british_columbia.csv", skiprows=1)
df_yukon = pd.read_csv("yukon.csv", skiprows=1)
df_northwest = pd.read_csv("northwest_territories.csv", skiprows=1)
df_nunavut = pd.read_csv("nunavut.csv", skiprows=1)

province_data = {
    "Canada": df_can,
    "Newfoundland and Labrador": df_newfound,
    "Prince Edward Island": df_pei,
    "Nova Scotia": df_nova,
    "New Brunswick": df_brunswick,
    "Quebec": df_quebec,
    "Ontario": df_ontario,
    "Manitoba": df_manitoba,
    "Saskatchewan": df_saskat,
    "Alberta": df_alberta,
    "British Columbia": df_bc,
    "Yukon": df_yukon,
    "Northwest Territories": df_northwest,
    "Nunavut": df_nunavut
}

In [332]:
# Question 1 Helper Functions
def map_to_category(occupation):
    occ_lower = str(occupation).lower()
    if "police" in occ_lower:
        return "Police"
    elif "nurse" in occ_lower:
        return "Nurse"
    elif "firefight" in occ_lower:
        return "Firefighters"
    return None

def filter_essential_and_group(df):
    mask = df["Occupation"].str.contains("nurse|police|firefight", case = False, na = False)
    df_ess = df[mask].copy()
    df_ess["Category"] = df_ess["Occupation"].apply(map_to_category)
    df_grouped = df_ess.groupby("Category", as_index = False)["Total Genders"].sum()
    return df_grouped


In [333]:
# Question 2 Helper Functions
def extract_top_level_noc(occ_str):
    if not isinstance(occ_str, str):
        return None
    match = re.match(r"(\d+)", occ_str.strip())
    if match:
        return match.group(1)[0]
    return None

def group_by_top_level_noc(df):
    df["Men"] = pd.to_numeric(df["Men"].apply(lambda x: str(x).replace(",", "")), errors = "coerce")
    df["Women"] = pd.to_numeric(df["Women"].apply(lambda x: str(x).replace(",", "")), errors = "coerce")
    df["NOC1"] = df["Occupation"].apply(extract_top_level_noc)
    df_filtered = df[df["NOC1"].isin(list("0123456789"))].copy()
    grouped = df_filtered.groupby("NOC1", as_index = False)[["Men", "Women"]].sum()
    melted = grouped.melt(id_vars = "NOC1", var_name = "Gender", value_name = "Count")
    return melted

In [334]:
# Question 3 Helper Functions
engineer_codes = [
    "21310 Electrical and electronics engineers",
    "21311 Computer engineers (except software engineers and designers)",
    "21301 Mechanical engineers"
]

def build_engineers_dataframe():
    rows = []
    for province_name, df in province_data.items():
        df_engineers = df[df["Occupation"].isin(engineer_codes)].copy()
        df_engineers["Total Genders"] = pd.to_numeric(df_engineers["Total Genders"].str.replace(",", ""), errors = "coerce")
        for _, row_data in df_engineers.iterrows():
            rows.append({
                "Province": province_name,
                "Occupation": row_data["Occupation"],
                "Total Genders": row_data["Total Genders"]
            })
    return pd.DataFrame(rows)

df_engineers_all = build_engineers_dataframe()

In [335]:
# Question 4 Helper Functions
def build_men_women_df():
    rows = []
    for province_name, df in province_data.items():
        df["Men"] = pd.to_numeric(df["Men"].str.replace(",", ""), errors = "coerce")
        df["Women"] = pd.to_numeric(df["Women"].str.replace(",", ""), errors = "coerce")
        total_men = df["Men"].sum()
        total_women = df["Women"].sum()
        rows.append({
            "Province": province_name,
            "Men": total_men,
            "Women": total_women
        })
    return pd.DataFrame(rows)

df_men_women_all = build_men_women_df()

In [336]:
app = dash.Dash(__name__)

app.layout = html.Div([
    # Question 1 Visualisation
    html.Div([
        html.H2("Question 1: Essential Services by Province/Territory"),
        dcc.Dropdown(
            id='province-dropdown-q1',
            options=[{"label": name, "value": name} for name in province_data.keys()],
            value='Manitoba',
            clearable = False
        ),
        dcc.Graph(id='essential-services-graph-q1')
    ], style={'margin-bottom': '50px'}),

    # Question 2 Visualisation
    html.Div([
        html.H2("Question 2: Employment Statistics Based on Gender By Top-Level NOC"),
        dcc.Dropdown(
            id='province-dropdown-q2',
            options=[{"label": name, "value": name} for name in province_data.keys()],
            value = 'Canada',
            clearable = False
        ),
        dcc.Slider(
            id = 'noc-slider-q2',
            min = 0,
            max = 9,
            step = 1,
            value = 0,
            marks = {i: str(i) for i in range(10)}
        ),
        dcc.Graph(id = 'gender-noc-graph-q2')
    ], style={'margin-bottom': '50px'}),

    # Question 3 Visualisation
    html.Div([
        html.H2("Question 3: Manpower for Electric Vehicle Factory (Engineers Only)"),
        html.Label("Select Occupation:"),
        dcc.Dropdown(
            id = "engineer-dropdown-q3",
            options = [{"label": code, "value": code} for code in engineer_codes],
            value = engineer_codes,
            multi = True,
            clearable = False
        ),
        dcc.Graph(id = "engineers-graph-q3")
    ], style = {'margin-bottom': '50px'}),

    # QQuestion 4 Visualisation
    html.Div([
        html.H2("Question 4: Men and Women in Work in Canada (by Province/Territory)"),
        dcc.Checklist(
            id = "gender-checklist-q4",
            options = [
                {"label": "Men", "value": "Men"},
                {"label": "Women", "value": "Women"}
            ],
            value = ["Men", "Women"],
            labelStyle = {'display': 'inline-block'}
        ),
        dcc.Graph(id = "men-women-graph-q4")
    ])
])

In [337]:
# Callback for Question 1
@app.callback(
    Output('essential-services-graph-q1', 'figure'),
    Input('province-dropdown-q1', 'value')
)
def update_graph_q1(selected_province):
    df_selected = province_data[selected_province]
    df_grouped = filter_essential_and_group(df_selected)
    fig = px.bar(
        df_grouped,
        x = 'Category',
        y = 'Total Genders',
        title = f"Total Genders in Essential Services: {selected_province}"
    )
    fig.update_layout(xaxis_title = "Essential Service", yaxis_title = "Total Genders")
    return fig

In [338]:
# Callback for Question 2
@app.callback(
    Output('gender-noc-graph-q2', 'figure'),
    Input('province-dropdown-q2', 'value'),
    Input('noc-slider-q2', 'value')
)
def update_graph_q2(selected_province, selected_noc):
    df_selected = province_data[selected_province].copy()
    df_melted = group_by_top_level_noc(df_selected)
    df_single_code = df_melted[df_melted["NOC1"] == str(selected_noc)]
    fig = px.bar(
        df_single_code,
        x = "Gender",
        y = "Count",
        color = "Gender",
        barmode = "group",
        title = f"Gender Variation for NOC code {selected_noc} in {selected_province}"
    )
    fig.update_layout(xaxis_title = "Gender", yaxis_title = "Total Employees")
    return fig

In [339]:
# Callback for Question 3
@app.callback(
    Output("engineers-graph-q3", "figure"),
    Input("engineer-dropdown-q3", "value")
)
def update_graph_q3(selected_occupations):
    if not selected_occupations:
        return px.bar(title = "No occupations selected")
    dff = df_engineers_all[df_engineers_all["Occupation"].isin(selected_occupations)]
    dff_grouped = dff.groupby(["Province", "Occupation"], as_index = False)["Total Genders"].sum()
    fig = px.bar(
        dff_grouped,
        x = "Province",
        y = "Total Genders",
        color = "Occupation",
        barmode = "group",
        title = "Available Engineers by Province/Territory"
    )
    fig.update_layout(xaxis_title = "Province/Territory", yaxis_title = "Number of Engineers")
    return fig

In [340]:
# Callback for Question 4
@app.callback(
    Output("men-women-graph-q4", "figure"),
    Input("gender-checklist-q4", "value")
)
def update_graph_q4(selected_genders):
    dff = df_men_women_all.melt(id_vars = "Province", var_name = "Gender", value_name = "Count")
    dff = dff[dff["Gender"].isin(selected_genders)]
    fig = px.bar(
        dff,
        x = "Province",
        y = "Count",
        color = "Gender",
        barmode = "group",
        title = "Men vs. Women in Work by Province/Territory"
    )
    fig.update_layout(xaxis_title = "Province/Territory", yaxis_title = "Total People in Work")
    return fig

In [341]:
# Run
if __name__ == '__main__':
    app.run(debug = True, port = 8052)

# Written Answers to Questions 1-4
## 1. How are human resources for essential services distributed across *administrative units* (provinces and territories)? Ideally, the government desires uniform distribution of essential services and must take appropriate measures if not. Consider nurses, police, and firefighters as essential services.

Human resources for essential services are uniformly distributed accross provinces and territories relative to their populations. Each province/territory has a similar amount of each essential service, but I noticed that in all the provinces/territories in Canada, there are not nearly as many firefighters as there are nurses and police. Police are the laregest group, nurses fall close behind, and firefighters make up a smaller share.
Each province and territory has a similar amount of each essential service, but the government needs to focus on having an even amount of police, nurses, and firefighters by recruiting more firefighters and nurses.

## 2. How the employment statistics vary based on gender in different administrative units. We are only interested in the data from the highest level of NOC, i.e. single digit codes for NOC like 1 Business, finance and administration occupations, 2 Natural and applied sciences and related occupations and so on.

From this bar graph, I see that half the NOC's have more males and the other half of them have more females. This pattern is the same in every province/territory.
Therefore, at a signle digit NOC level, the gender distributions are fairly similar in different administrative untis (provinces/territories).

## 3. Which administrative unit has enough manpower available (Computer, Mechanical and Electrical engineers) to setup up a factory for Electronic Vehicles. We do not need the definition of enough, this is something the user needs to know. We will provide tools to explore the data such that they can evaluate different provinces and reach a decision.

Based on the visualisation below, Ontario has the most manpower available to set up a factory for electric vehicles. Quebec has the second most manpower available.

## 4. Include a visualization of your choice to highlight any interesting insights gained from the dataset. This is an open-ended task to demonstrate your skills.

I created a graph to comare the amount of men and women in the workforce through out all the provinces/territories in Canada.
It looks like it is a fairly equal amount of men and women in the workforce based in all of the provinces/territories.