## Wage-Hours-Job for Worker

In [88]:
import pandas as pd
import altair as alt

def month_to_quarter(df: pd.DataFrame) -> pd.DataFrame:

        df2 = df.copy()
        
        time_col, value_col = df2.iloc[:2]

        df2[time_col] = pd.to_datetime(df2[time_col], format="%Y-%m")

        df2["YearQuarter"] = df2[time_col].dt.to_period("Q").astype(str)

        df_q = (df2.groupby("YearQuarter", as_index=False)[value_col].mean())

        return df_q

def extract_year(df: pd.DataFrame) -> pd.DataFrame:

        df2 = df.copy()
        df2[['Year', 'Q']] = df2['YearQuarter'].str.extract(r'(\d{4})Q([1-4])')

        df2['Year'] = df2['Year'].astype(int)
        df2['Q']    = df2['Q'].astype(int)

        df2['QuarterStart'] = pd.to_datetime(df2['Year'].astype(str) + '-' + ((df2['Q'] - 1) * 3 + 1).astype(str) + '-01')

        df2 = df2.drop("YearQuarter", axis=1)
       
        return df2



def make_quarter_chart(df, title=None):

        global year_sel

        base = alt.Chart(df).mark_line(color='lightgray').encode(
                x='YearQuarter:T',
                y=alt.Y('Value:Q', title=title)
        )

        highlight = alt.Chart(df).mark_line(color='orange', strokeWidth=3).encode(
                x='YearQuarter:T',
                y='Value:Q'
        ).transform_filter(year_sel)

        return (base + highlight).properties(width=800, height=100)

In [89]:
# prepare the data for drawing parallel coordinate
import pandas as pd
pd.Series.iteritems = pd.Series.items # bug repair
import altair as alt

# wage_hour data(parallel coordinate)
df_avg_earnings_annual = pd.read_csv("../data/Processed/selected_data_for_worker/wage_hour/avg_hourly_earnings_dollars_annual.csv")
df_avg_work_hours_annual = pd.read_csv("../data/Processed/selected_data_for_worker/wage_hour/avg_weekly_work_hours_annual.csv")
df_compensation_hour_annual = pd.read_csv("../data/Processed/selected_data_for_worker/wage_hour/priv_total_compensation_cost_dollars_per_hour_annual.csv")
df_salary_hour_annual = pd.read_csv("../data/Processed/selected_data_for_worker/wage_hour/priv_wage_salaries_cost_dollars_per_hour_annual.csv")

df_wage_hour = (
    df_avg_earnings_annual
    .merge(df_avg_work_hours_annual, on='Year', how='outer')
    .merge(df_compensation_hour_annual, on='Year', how='outer')
    .merge(df_salary_hour_annual,  on='Year', how='outer')
)

# further process for altair
df_long = df_wage_hour.melt(
    id_vars=['Year'],
    value_vars=[
        'avg_hourly_earnings_dollars_annual_mean',
        'avg_weekly_work_hours_annual_mean',
        'priv_total_compensation_cost_dollars_per_hour_Estimate Value_annual_mean',
        'priv_wage_salaries_cost_dollars_per_hour_Estimate Value_annual_mean'
    ],
    var_name='Metric',
    value_name='Value'
)

label_map = {
    'avg_hourly_earnings_dollars_annual_mean': 'Hourly Earnings',
    'avg_weekly_work_hours_annual_mean':    'Weekly Hours',
    'priv_total_compensation_cost_dollars_per_hour_Estimate Value_annual_mean': 'Total Comp/hr',
    'priv_wage_salaries_cost_dollars_per_hour_Estimate Value_annual_mean':    'Wage Cost/hr'
}
df_long['Metric'] = df_long['Metric'].map(label_map)
df_long['Year'] = df_long['Year'].astype(str)

In [92]:
# employment data(multiple line cahrts)
df_employ = pd.read_csv("../data/Processed/selected_data_for_worker/employment/all_employees_thousands.csv")
df_job_open = pd.read_csv("../data/Processed/selected_data_for_worker/employment/job_opennings_thousands.csv")
df_num_privest_jbg = pd.read_csv("../data/Processed/selected_data_for_worker/employment/num_priv_estab_gross_job_gains_thousands.csv")
df_num_privest_jbl = pd.read_csv("../data/Processed/selected_data_for_worker/employment/num_priv_estab_gross_job_losses_thousands.csv")
df_unemploy_rate = pd.read_csv("../data/Processed/selected_data_for_worker/employment/unemployment_rate.csv")

# transform monthly data to quarterly
df_employ_q = month_to_quarter(df_employ)
df_job_open = month_to_quarter(df_job_open)
df_unemploy_rate_q = month_to_quarter(df_unemploy_rate)
df_employ_q = df_employ_q.iloc[:-3]
df_job_open = df_job_open.iloc[:-3]
df_unemploy_rate_q = df_unemploy_rate_q.iloc[:-3]

# modify quarter format of quarterly data
df_num_privest_jbg['YearQuarter'] = df_num_privest_jbg ['YearQuarter'].str.replace(r'\s+', '', regex=True)
df_num_privest_jbl['YearQuarter'] = df_num_privest_jbl ['YearQuarter'].str.replace(r'\s+', '', regex=True)
df_net = pd.merge(df_num_privest_jbg, df_num_privest_jbl, on = 'YearQuarter', how = "outer")
df_net['net_job_thousands'] = df_net['num_priv_estab_gross_job_gains_thousands'] - df_net['num_priv_estab_gross_job_losses_thousands']
df_net = df_net[['YearQuarter', 'net_job_thousands']]
df_net = extract_year(df_net)
print(df_net)

    net_job_thousands  Year  Q QuarterStart
0                 2.0  2013  1   2013-01-01
1                 7.0  2013  2   2013-04-01
2                 4.0  2013  3   2013-07-01
3                 7.0  2013  4   2013-10-01
4                 2.0  2014  1   2014-01-01
5                 9.0  2014  2   2014-04-01
6                 6.0  2014  3   2014-07-01
7                 9.0  2014  4   2014-10-01
8                -1.0  2015  1   2015-01-01
9                 4.0  2015  2   2015-04-01
10               -4.0  2015  3   2015-07-01
11               -2.0  2015  4   2015-10-01
12               -4.0  2016  1   2016-01-01
13               -4.0  2016  2   2016-04-01
14               -3.0  2016  3   2016-07-01
15               -1.0  2016  4   2016-10-01
16                5.0  2017  1   2017-01-01
17                4.0  2017  2   2017-04-01
18               -3.0  2017  3   2017-07-01
19                9.0  2017  4   2017-10-01
20                7.0  2018  1   2018-01-01
21                8.0  2018  2  

In [None]:
# visualization 

# build selection
year_sel = alt.selection_point(
    name='YearSel',
    fields=['Year'],
    on='click',
    nearest=True,
    clear='dblclick',
    toggle='event.shiftKey'
)

selectors = alt.Chart(df_long).mark_point(opacity=0).encode(
    x=alt.X('Metric:N'),
    y=alt.Y('Value:Q', scale=alt.Scale(domain=[20, 50])),
    color=alt.condition(year_sel, alt.value('orange'), alt.value('lightgray')),
    tooltip=['Year:N']
).add_params(year_sel)

# build parallel coordinate
base = alt.Chart(df_long).encode(
    x=alt.X('Metric:N', title=None, axis=alt.Axis(orient='top', labelAngle=0, grid=True)),
    y=alt.Y('Value:Q', title=None, scale=alt.Scale(domain=[20,50])),
    color=alt.condition(
        year_sel,
        'Year:O',
        alt.value('#eee')
    ),
    detail='Year:N',        
    tooltip=['Year:N','Metric:N','Value:Q']
)

lines  = base.mark_line(interpolate='monotone', strokeWidth=2)
points = base.mark_point(size=60, filled=True)

parallel_coords = (selectors + lines + points).properties(
    width=800, height=300,
    title='Annual Worker Metrics Parallel Coordinates'
)

# build multiple line charts

def make_quarter_chart_with_window(df, title):

    base_lines = alt.Chart(df).mark_line(color='lightgray', opacity=0.5).encode(
        x='QuarterStart:T',
        y=alt.Y('net_job_thousands:Q', title=title),
    )
    

    highlight = alt.Chart(df).mark_line(size=2).encode(
        x='QuarterStart:T',
        y=alt.Y('net_job_thousands:Q', title=title),
        color='Year:O'
    ).add_params(year_sel)

    return (
      alt.layer(base_lines, highlight)
         .properties(width=800, height=100)
    )

c1 = make_quarter_chart_with_window(df_net, '指标 A')
final = alt.vconcat(parallel_coords, c1) \
    .configure_concat(spacing=10) \

final
