## Wage-Hours-Job for Worker

In [1]:
import pandas as pd
import altair as alt

def month_to_quarter(df: pd.DataFrame) -> pd.DataFrame:

        df2 = df.copy()
        
        time_col, value_col = df2.iloc[:2]

        df2[time_col] = pd.to_datetime(df2[time_col], format="%Y-%m")

        df2["YearQuarter"] = df2[time_col].dt.to_period("Q").astype(str)

        df_q = (df2.groupby("YearQuarter", as_index=False)[value_col].mean())

        return df_q

def extract_year(df: pd.DataFrame) -> pd.DataFrame:

        df2 = df.copy()
        df2[['Year', 'Q']] = df2['YearQuarter'].str.extract(r'(\d{4})Q([1-4])')

        df2['Year'] = df2['Year'].astype(int)
        df2['Q']    = df2['Q'].astype(int)

        df2['QuarterStart'] = pd.to_datetime(df2['Year'].astype(str) + '-' + ((df2['Q'] - 1) * 3 + 1).astype(str) + '-01')

        df2 = df2.drop("YearQuarter", axis=1)
       
        return df2

In [2]:
# prepare the data for drawing parallel coordinate
import pandas as pd
pd.Series.iteritems = pd.Series.items # bug repair
import altair as alt

# wage_hour data(parallel coordinate)
df_avg_earnings_annual = pd.read_csv("../data/Processed/selected_data_for_worker/wage_hour/avg_hourly_earnings_dollars_annual.csv")
df_avg_work_hours_annual = pd.read_csv("../data/Processed/selected_data_for_worker/wage_hour/avg_weekly_work_hours_annual.csv")
df_compensation_hour_annual = pd.read_csv("../data/Processed/selected_data_for_worker/wage_hour/priv_total_compensation_cost_dollars_per_hour_annual.csv")
df_salary_hour_annual = pd.read_csv("../data/Processed/selected_data_for_worker/wage_hour/priv_wage_salaries_cost_dollars_per_hour_annual.csv")

df_wage_hour = (
    df_avg_earnings_annual
    .merge(df_avg_work_hours_annual, on='Year', how='outer')
    .merge(df_compensation_hour_annual, on='Year', how='outer')
    .merge(df_salary_hour_annual,  on='Year', how='outer')
)

# further process for altair
df_long = df_wage_hour.melt(
    id_vars=['Year'],
    value_vars=[
        'avg_hourly_earnings_dollars_annual_mean',
        'avg_weekly_work_hours_annual_mean',
        'priv_total_compensation_cost_dollars_per_hour_Estimate Value_annual_mean',
        'priv_wage_salaries_cost_dollars_per_hour_Estimate Value_annual_mean'
    ],
    var_name='Metric',
    value_name='Value'
)

label_map = {
    'avg_hourly_earnings_dollars_annual_mean': 'Hourly Earnings ($/hr)',
    'avg_weekly_work_hours_annual_mean':    'Weekly Hours (hr)',
    'priv_total_compensation_cost_dollars_per_hour_Estimate Value_annual_mean': 'Total Comp ($/hr)',
    'priv_wage_salaries_cost_dollars_per_hour_Estimate Value_annual_mean':    'Wage Cost ($/hr)'
}
df_long['Metric'] = df_long['Metric'].map(label_map)
df_long['Year'] = df_long['Year'].astype(str)

In [3]:
# employment data(multiple line cahrts)
df_employ = pd.read_csv("../data/Processed/selected_data_for_worker/employment/all_employees_thousands.csv")
df_job_open = pd.read_csv("../data/Processed/selected_data_for_worker/employment/job_opennings_thousands.csv")
df_num_privest_jbg = pd.read_csv("../data/Processed/selected_data_for_worker/employment/num_priv_estab_gross_job_gains_thousands.csv")
df_num_privest_jbl = pd.read_csv("../data/Processed/selected_data_for_worker/employment/num_priv_estab_gross_job_losses_thousands.csv")
df_unemploy_rate = pd.read_csv("../data/Processed/selected_data_for_worker/employment/unemployment_rate.csv")

# transform monthly data to quarterly
df_employ_q = month_to_quarter(df_employ)
df_job_open = month_to_quarter(df_job_open)
df_unemploy_rate_q = month_to_quarter(df_unemploy_rate)
df_employ_q = df_employ_q.iloc[:-3]
df_job_open = df_job_open.iloc[:-3]
df_unemploy_rate_q = df_unemploy_rate_q.iloc[:-3]

# modify quarter format of quarterly data
df_num_privest_jbg['YearQuarter'] = df_num_privest_jbg ['YearQuarter'].str.replace(r'\s+', '', regex=True)
df_num_privest_jbl['YearQuarter'] = df_num_privest_jbl ['YearQuarter'].str.replace(r'\s+', '', regex=True)
df_net = pd.merge(df_num_privest_jbg, df_num_privest_jbl, on = 'YearQuarter', how = "outer")
df_net['net_job_thousands'] = df_net['num_priv_estab_gross_job_gains_thousands'] - df_net['num_priv_estab_gross_job_losses_thousands']
df_net = df_net[['YearQuarter', 'net_job_thousands']]

#
df_net = extract_year(df_net)
print(df_net)

df_unemploy_rate_q = extract_year(df_unemploy_rate_q)
print(df_unemploy_rate_q)

df_job_open = extract_year(df_job_open)
print(df_job_open)

df_employ_q = extract_year(df_employ_q)
print(df_employ_q)

    net_job_thousands  Year  Q QuarterStart
0                 2.0  2013  1   2013-01-01
1                 7.0  2013  2   2013-04-01
2                 4.0  2013  3   2013-07-01
3                 7.0  2013  4   2013-10-01
4                 2.0  2014  1   2014-01-01
5                 9.0  2014  2   2014-04-01
6                 6.0  2014  3   2014-07-01
7                 9.0  2014  4   2014-10-01
8                -1.0  2015  1   2015-01-01
9                 4.0  2015  2   2015-04-01
10               -4.0  2015  3   2015-07-01
11               -2.0  2015  4   2015-10-01
12               -4.0  2016  1   2016-01-01
13               -4.0  2016  2   2016-04-01
14               -3.0  2016  3   2016-07-01
15               -1.0  2016  4   2016-10-01
16                5.0  2017  1   2017-01-01
17                4.0  2017  2   2017-04-01
18               -3.0  2017  3   2017-07-01
19                9.0  2017  4   2017-10-01
20                7.0  2018  1   2018-01-01
21                8.0  2018  2  

In [4]:
# visualization 

# build selection
year_sel = alt.selection_point(
    name='YearSel',
    fields=['Year'],
    on='click',
    nearest=True,
    clear='dblclick',
    toggle='event.shiftKey'
)

selectors = alt.Chart(df_long).mark_point(opacity=0).encode(
    x=alt.X('Metric:N'),
    y=alt.Y('Value:Q', scale=alt.Scale(domain=[20, 50])),
    color=alt.condition(year_sel, alt.value('orange'), alt.value('lightgray')),
    tooltip=['Year:N']
).add_params(year_sel)

# build parallel coordinate
base = alt.Chart(df_long).encode(
    x=alt.X('Metric:N', title=None, axis=alt.Axis(orient='top', labelAngle=0, grid=True)),
    y=alt.Y('Value:Q', title=None, scale=alt.Scale(domain=[20,50])),
    color=alt.condition(
        year_sel,
        'Year:O',
        alt.value('#eee')
    ),
    detail='Year:N',        
    tooltip=['Year:N','Metric:N','Value:Q']
)

lines  = base.mark_line(interpolate='monotone', strokeWidth=2)
points = base.mark_point(size=60, filled=True)

parallel_coords = (selectors + lines + points).properties(
    width=800, height=300,
    title='Annual Worker Metrics Parallel Coordinates'
)

# build multiple line charts

def make_quarter_chart_with_window(df, title):

    base_lines = alt.Chart(df).mark_line(color='lightgray', opacity=0.5).encode(
        x='QuarterStart:T',
        y=alt.Y('net_job_thousands:Q', title=title),
    )
    

    highlight = alt.Chart(df).mark_line(size=2).encode(
        x='QuarterStart:T',
        y=alt.Y('net_job_thousands:Q', title=title),
        color='Year:O'
    ).add_params(year_sel)

    return (
      alt.layer(base_lines, highlight)
         .properties(width=800, height=100)
    )

c1 = make_quarter_chart_with_window(df_net, 'net_job_thousands')
final = alt.vconcat(parallel_coords, c1).configure_concat(spacing=10) 

final


In [5]:
# build selector
years = sorted(df_long['Year'].unique())

start_dd = alt.param(
    name='start_year',
    bind=alt.binding_select(options=years, name='Starting Year: '),
    value=years[0]
)

end_dd = alt.param(
    name='end_year',
    bind=alt.binding_select(options=years, name='Ending Year: '),
    value=years[-1]
)

highlight = (alt.datum.Year >= start_dd) & (alt.datum.Year <= end_dd)

# build the parallel
base = alt.Chart(df_long).add_params(start_dd, end_dd).encode(
        x=alt.X('Metric:N',title=None,axis=alt.Axis(orient='top', labelAngle=0, grid=True)),
        y=alt.Y('Value:Q',title=None,scale=alt.Scale(domain=[20,50])),
        detail='Year:N',               
        tooltip=['Year:O','Metric:N','Value:Q']
    )

lines = base.mark_line(interpolate='monotone', strokeWidth=2).encode(
    color=alt.Color('Year:O', scale=alt.Scale(scheme='purples')),
    opacity=alt.condition(highlight, alt.value(1.0), alt.value(0.15))
)

points = base.mark_point(size=60, filled=True).encode(
    color=alt.Color('Year:O', scale=alt.Scale(scheme='purples')),
    opacity=alt.condition(highlight, alt.value(1.0), alt.value(0.15))
)

parallel_coords = (lines + points).properties(
        width=1000,
        height=300
    )

# build line charts
def make_quarter_chart_with_window(df, title, label_x, value_col_name:str):

    min_value = df[value_col_name].min()
    max_value = df[value_col_name].max()

    highlight = (alt.datum.Year >= start_dd) & (alt.datum.Year <= end_dd)

    bg = alt.Chart(df).add_params(start_dd, end_dd).mark_line(strokeWidth=3, color='lightgray', opacity=1).encode(
        x=alt.X('QuarterStart:T', title=label_x),
        y=alt.Y(f'{value_col_name}:Q', title=None, scale=alt.Scale(domain=[min_value*0.9, max_value*1.1]))
    )

    bg_points = bg.mark_point(color='lightgray', size=20, opacity=1)

    hl = alt.Chart(df).add_params(start_dd, end_dd).transform_filter(highlight).mark_line(strokeWidth=3).encode(
            x='QuarterStart:T',
            y=alt.Y(f'{value_col_name}:Q', title=None),
            color=alt.value("#9997BC")
    )

    hl_points = (
        alt.Chart(df)
        .add_params(start_dd, end_dd)
        .transform_filter(highlight)
        .mark_point(size=20,filled=True)
        .encode(
            x='QuarterStart:T',
            y=f'{value_col_name}:Q',
            color=alt.value("#555184"),
            tooltip=[
               alt.Tooltip('QuarterStart:T'),
               alt.Tooltip(f'{value_col_name}:Q', format='.0f')
           ]
        )
    )

    return alt.layer(bg, bg_points, hl, hl_points).properties(width=500, height=100, title = title)


c1 = make_quarter_chart_with_window(df_net, label_x="Year", title="Net Establishment Changes in Job Gains and Losses (Thousands)", value_col_name="net_job_thousands")
c2 = make_quarter_chart_with_window(df_job_open, label_x="Year", title="Trends in Job Openings in Manufacturing (Thousands)", value_col_name="job_opennings_thousands")
c3 = make_quarter_chart_with_window(df_employ_q, label_x="Year", title="Trends in Total Employment in Manufacturing (Thousands)", value_col_name="all_employees_thousands")
c4 = make_quarter_chart_with_window(df_unemploy_rate_q, label_x="Year", title="Trends in Unemployment Rate in Manufacturing (%)", value_col_name="unemployment_rate")

b1 = alt.hconcat(c2, c1)
b2 = alt.hconcat(c3,c4)

final = alt.vconcat(parallel_coords, b1, b2).properties(title = alt.TitleParams(text="Evolution of Wages, Working Hours, and Employment Conditions of U.S. Manufacturing Workers (2013-2024)", anchor="middle", fontSize=16)).resolve_legend(
    color='independent',
    size='independent')


final

In [13]:
# build selector
years = sorted(df_long['Year'].unique())

start_dd = alt.param(
    name='start_year',
    bind=alt.binding_select(options=years, name='Starting Year: '),
    value=years[0]
)

end_dd = alt.param(
    name='end_year',
    bind=alt.binding_select(options=years, name='Ending Year: '),
    value=years[-1]
)

highlight = (alt.datum.Year >= start_dd) & (alt.datum.Year <= end_dd)

# build the parallel
base = alt.Chart(df_long).add_params(start_dd, end_dd).encode(
        x=alt.X('Metric:N',title=None,axis=alt.Axis(orient='top', labelAngle=0, grid=True)),
        y=alt.Y('Value:Q',title=None,scale=alt.Scale(domain=[20,50])),
        detail='Year:N',               
        tooltip=['Year:O','Metric:N','Value:Q']
    )

lines = base.mark_line(interpolate='monotone', strokeWidth=2).encode(
    color=alt.Color('Year:O', scale=alt.Scale(scheme='purples')),
    opacity=alt.condition(highlight, alt.value(1.0), alt.value(0.15))
)

points = base.mark_point(size=60, filled=True).encode(
    color=alt.Color('Year:O', scale=alt.Scale(scheme='purples')),
    opacity=alt.condition(highlight, alt.value(1.0), alt.value(0.15))
)

parallel_coords = (lines + points).properties(
        width=1000,
        height=300
    )

# build line charts
def make_quarter_chart_with_window(df, title, label_x, value_col_name: str):
    import pandas as pd

    min_value = df[value_col_name].min()
    max_value = df[value_col_name].max()

    highlight = (alt.datum.Year >= start_dd) & (alt.datum.Year <= end_dd)

    # generate only one label per year at Jan 1st
    year_ticks = pd.date_range(start='2013-01-01', end='2024-01-01', freq='YS').strftime('%Y-%m-%dT%H:%M:%S').tolist()

    x_axis = alt.X(
        'QuarterStart:T',
        title='Year',
        axis=alt.Axis(
            grid=False,
            format='%Y',
            values=year_ticks  # only one tick per year
        )
    )

    bg = alt.Chart(df).add_params(start_dd, end_dd).mark_line(
        strokeWidth=3, color='lightgray', opacity=1
    ).encode(
        x=x_axis,
        y=alt.Y(
            f'{value_col_name}:Q',
            title=None,
            scale=alt.Scale(domain=[min_value * 0.9, max_value * 1.1]),
            axis=alt.Axis(grid=False)
        )
    )

    bg_points = bg.mark_point(color='lightgray', size=20, opacity=1)

    hl = alt.Chart(df).add_params(start_dd, end_dd).transform_filter(highlight).mark_line(
        strokeWidth=3
    ).encode(
        x=x_axis,
        y=alt.Y(f'{value_col_name}:Q', axis=alt.Axis(grid=False)),
        color=alt.value("#9997BC")
    )

    hl_points = alt.Chart(df).add_params(start_dd, end_dd).transform_filter(highlight).mark_point(
        size=20, filled=True
    ).encode(
        x=x_axis,
        y=alt.Y(f'{value_col_name}:Q', axis=alt.Axis(grid=False)),
        color=alt.value("#555184"),
        tooltip=[
            alt.Tooltip('QuarterStart:T', title='Quarter Start', format='%Y-%m'),
            alt.Tooltip('Quarter:N', title='Quarter Number'),
            alt.Tooltip(f'{value_col_name}:Q', title='Value', format='.0f')
        ]
    )

    return alt.layer(bg, bg_points, hl, hl_points).properties(width=1000, height=100, title=title)




c1 = make_quarter_chart_with_window(df_net, label_x="Year", title="Net Establishment Changes in Job Gains and Losses (Thousands)", value_col_name="net_job_thousands")
c2 = make_quarter_chart_with_window(df_job_open, label_x="Year", title="Trends in Job Openings in Manufacturing (Thousands)", value_col_name="job_opennings_thousands")
c3 = make_quarter_chart_with_window(df_employ_q, label_x="Year", title="Trends in Total Employment in Manufacturing (Thousands)", value_col_name="all_employees_thousands")
c4 = make_quarter_chart_with_window(df_unemploy_rate_q, label_x="Year", title="Trends in Unemployment Rate in Manufacturing (%)", value_col_name="unemployment_rate")

b1 = c1
b2 = c4

final = alt.vconcat(
    parallel_coords,
    b1.properties(width=1000),
    b2.properties(width=1000)
).properties(
    title=alt.TitleParams(
        text="Evolution of Wages, Working Hours, and Employment Conditions of U.S. Manufacturing Workers (2013–2024)",
        anchor="middle",
        fontSize=16
    )
).resolve_legend(
    color='independent',
    size='independent'
)

final


In [14]:
final.save('../img/workers.html')