In [1]:
import pandas as pd
import numpy as np

pd.set_option("mode.copy_on_write", True)
# suppress warnings
import warnings

warnings.filterwarnings("ignore")

employment = pd.read_csv("employment.csv")

In [2]:
employment.head()

Unnamed: 0,ref_area.label,source.label,indicator.label,sex.label,classif1.label,time,obs_value,obs_status.label,note_classif.label,note_indicator.label,note_source.label
0,Aruba,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Total,Economic activity (Broad sector): Total,2011,47.915,,,,
1,Aruba,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Total,Economic activity (Broad sector): Agriculture,2011,0.286,,,,
2,Aruba,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Total,Economic activity (Broad sector): Non-agriculture,2011,47.629,,,,
3,Aruba,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Total,Economic activity (Broad sector): Industry,2011,6.728,,,,
4,Aruba,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Total,Economic activity (Broad sector): Services,2011,40.763,,,,


In [3]:
employment["obs_value"].isnull().sum()

8579

In [4]:
# remove all rows with missing values in obs_value
employment = employment.dropna(subset=["obs_value"])

In [5]:
employment_isic = employment[employment["classif1.label"].str.contains("ISIC")]

In [6]:
employment_isic["time"] = employment_isic["time"].astype(int)

In [7]:
# Sort the dataframe by 'time' column in descending order within each group
employment_isic_sorted = employment_isic.sort_values(
    by=["ref_area.label", "time"], ascending=[True, False]
)

In [8]:
employment_isic_most_recent = employment_isic_sorted.groupby("ref_area.label").apply(
    lambda x: x[x["time"] == x["time"].max()]
)

# Resetting index after grouping
employment_isic_most_recent = employment_isic_most_recent.reset_index(drop=True)

In [9]:
risk_versions = pd.read_csv("Industry_versions_combined.csv")

In [10]:
# remove rows with NaN values in the average column
risk_versions = risk_versions.dropna(subset=["average"])
risk_versions = risk_versions[["Mapping_x", "Mapping_y", "average"]]

In [11]:
merged = pd.merge(
    employment_isic_most_recent,
    risk_versions,
    left_on="classif1.label",
    right_on="Mapping_x",
)

In [12]:
merged

Unnamed: 0,ref_area.label,source.label,indicator.label,sex.label,classif1.label,time,obs_value,obs_status.label,note_classif.label,note_indicator.label,note_source.label,Mapping_x,Mapping_y,average
0,Afghanistan,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Total,Economic activity (ISIC-Rev.4): Total,2021,7699.765,,,,Repository: ILO-STATISTICS - Micro data proces...,Economic activity (ISIC-Rev.4): Total,Economic activity (ISIC-Rev.4): Total,25.0
1,Afghanistan,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Total,Economic activity (ISIC-Rev.4): A. Agriculture...,2021,3747.953,,,,Repository: ILO-STATISTICS - Micro data proces...,Economic activity (ISIC-Rev.4): A. Agriculture...,Economic activity (ISIC-Rev.4): A. Agriculture...,28.0
2,Afghanistan,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Total,Economic activity (ISIC-Rev.4): B. Mining and ...,2021,30.685,,,,Repository: ILO-STATISTICS - Micro data proces...,Economic activity (ISIC-Rev.4): B. Mining and ...,Economic activity (ISIC-Rev.4): B. Mining and ...,6.0
3,Afghanistan,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Total,Economic activity (ISIC-Rev.4): C. Manufacturing,2021,491.404,,,,Repository: ILO-STATISTICS - Micro data proces...,Economic activity (ISIC-Rev.4): C. Manufacturing,Economic activity (ISIC-Rev.4): C. Manufacturing,9.0
4,Afghanistan,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Total,Economic activity (ISIC-Rev.4): F. Construction,2021,917.832,,,,Repository: ILO-STATISTICS - Micro data proces...,Economic activity (ISIC-Rev.4): F. Construction,Economic activity (ISIC-Rev.4): F. Construction,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9802,Zimbabwe,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Female,Economic activity (ISIC-Rev.4): O. Public admi...,2022,46.784,,,,Repository: ILO-STATISTICS - Micro data proces...,Economic activity (ISIC-Rev.4): O. Public admi...,Economic activity (ISIC-Rev.4): O. Public admi...,28.0
9803,Zimbabwe,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Female,Economic activity (ISIC-Rev.4): P. Education,2022,131.301,,,,Repository: ILO-STATISTICS - Micro data proces...,Economic activity (ISIC-Rev.4): P. Education,Economic activity (ISIC-Rev.4): P. Education,27.0
9804,Zimbabwe,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Female,Economic activity (ISIC-Rev.4): Q. Human healt...,2022,33.174,,,,Repository: ILO-STATISTICS - Micro data proces...,Economic activity (ISIC-Rev.4): Q. Human healt...,Economic activity (ISIC-Rev.4): Q. Human healt...,29.0
9805,Zimbabwe,LFS - Labour Force Survey,Employment by sex and economic activity (thous...,Sex: Female,"Economic activity (ISIC-Rev.4): R. Arts, enter...",2022,2.729,,,,Repository: ILO-STATISTICS - Micro data proces...,"Economic activity (ISIC-Rev.4): R. Arts, enter...","Economic activity (ISIC-Rev.4): R. Arts, enter...",26.0


In [13]:
merged_subset = merged[
    ["ref_area.label", "sex.label", "Mapping_y", "obs_value", "average"]
]

In [14]:
merged_subset["affected"] = (
    merged_subset["obs_value"] * merged_subset["average"] / 100 * 1000
).astype(int)

In [15]:
merged_subset

Unnamed: 0,ref_area.label,sex.label,Mapping_y,obs_value,average,affected
0,Afghanistan,Sex: Total,Economic activity (ISIC-Rev.4): Total,7699.765,25.0,1924941
1,Afghanistan,Sex: Total,Economic activity (ISIC-Rev.4): A. Agriculture...,3747.953,28.0,1049426
2,Afghanistan,Sex: Total,Economic activity (ISIC-Rev.4): B. Mining and ...,30.685,6.0,1841
3,Afghanistan,Sex: Total,Economic activity (ISIC-Rev.4): C. Manufacturing,491.404,9.0,44226
4,Afghanistan,Sex: Total,Economic activity (ISIC-Rev.4): F. Construction,917.832,6.0,55069
...,...,...,...,...,...,...
9802,Zimbabwe,Sex: Female,Economic activity (ISIC-Rev.4): O. Public admi...,46.784,28.0,13099
9803,Zimbabwe,Sex: Female,Economic activity (ISIC-Rev.4): P. Education,131.301,27.0,35451
9804,Zimbabwe,Sex: Female,Economic activity (ISIC-Rev.4): Q. Human healt...,33.174,29.0,9620
9805,Zimbabwe,Sex: Female,"Economic activity (ISIC-Rev.4): R. Arts, enter...",2.729,26.0,709


In [16]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd

# Get unique values for sex.label and Mapping_y
sex_options = merged_subset["sex.label"].unique()
class_options = merged_subset["Mapping_y"].unique()

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div(
    [
        dcc.Graph(id="map-graph", style={"height": "700px"}),
        html.Label("Select Sex:"),
        dcc.Dropdown(
            id="sex-dropdown",
            options=[{"label": sex, "value": sex} for sex in sex_options],
            value="Sex: Total",  # Default value
        ),
        html.Label("Select Class:"),
        dcc.Dropdown(
            id="class-dropdown",
            options=[
                {"label": class_label, "value": class_label}
                for class_label in class_options
            ],
            value="Class A",  # Default value
        ),
    ]
)


# Define callback to update the map based on dropdown selection
@app.callback(
    Output("map-graph", "figure"),
    [Input("sex-dropdown", "value"), Input("class-dropdown", "value")],
)
def update_map(sex, class_label):
    # Filter DataFrame based on selected sex and class
    filtered_df = merged_subset[
        (merged_subset["sex.label"] == sex)
        & (merged_subset["Mapping_y"] == class_label)
    ]

    # Create choropleth map
    fig = px.choropleth(
        filtered_df,
        locations="ref_area.label",
        locationmode="country names",
        scope="world",
        color="affected",
        hover_data=["Mapping_y", "sex.label"],
        labels={"affected": "Affected"},
        title=f"Affected by Class ({sex}, {class_label})",
        color_continuous_scale="Jet",
    )

    # Only show the United States on the map
    fig.update_geos(
        visible=False,
        showcountries=True,
        countrycolor="Black",
        showland=True,
        landcolor="white",
    )

    return fig


# Run the app
if __name__ == "__main__":
    app.run_server(debug=True, port=8033)