## Cost - Unit labor cost
## Efficiency - productivity

In [1]:
import pandas as pd
import altair as alt
import numpy as np

df = pd.read_csv("../data/Processed/efficiency_productivity/efficiency_productivity.csv")

# compute yearly average labor productivity and efficiency(producer)
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df_year = df.groupby('Year', as_index=False)[['Labor productivity','Unit labor costs']].mean()
print(df_year)

    Year  Labor productivity  Unit labor costs
0   2013           102.84175          88.68700
1   2014           102.54325          91.29650
2   2015           100.76275          95.46550
3   2016           100.71300          96.18725
4   2017           100.00850          99.99750
5   2018           100.22675         102.18900
6   2019            97.90625         107.20625
7   2020            97.78950         114.53900
8   2021            99.48350         115.33175
9   2022            98.18900         120.61175
10  2023            98.03075         126.75750
11  2024            98.67175         130.99000


In [2]:
# deal with dataset that contains the number of private establishment each quarter
df_estb = pd.read_csv("../data/Processed/num_private_establishment_count.csv")

# convert quarter data to year data. aggregate method: mean
df_estb['Year'] = df_estb['YearQuarter'].str[:4].astype(int)
df_estb_annual = (df_estb.groupby('Year', as_index=False)['num_private_establishment_count'].mean().rename(columns={'num_private_establishment_count': 'Average_Establishments'}))

#print(df_estb_annual)

df_growth = df_estb_annual.sort_values('Year').copy()
df_growth['GrowthPct'] = (
    df_growth['Average_Establishments']
      .pct_change()      
      .mul(100)         
      .round(2)
)

df_growth['GrowthPct'] = df_growth['GrowthPct'].fillna(0)

print(df_growth[['Year','GrowthPct']])

    Year  GrowthPct
0   2013       0.00
1   2014       0.90
2   2015       0.85
3   2016       0.90
4   2017       0.88
5   2018       1.36
6   2019       1.02
7   2020       0.92
8   2021       2.26
9   2022       3.91
10  2023       3.02
11  2024       2.23


In [4]:
# edit policy bar
import pandas as pd

policies = pd.DataFrame([
    {'start':'2015-01-01', 'end':'2016-06-30', 'policy':'A'},
    {'start':'2016-07-01', 'end':'2018-03-31', 'policy':'B'},
    {'start':'2018-04-01', 'end':'2020-12-31', 'policy':'C'},
])
policies['start'] = pd.to_datetime(policies['start'])
policies['end']   = pd.to_datetime(policies['end'])

In [5]:
# visualization

# build policy bar
policy_bars = alt.Chart(policies).mark_rect().encode(
    x='start:T',
    x2='end:T',
    y=alt.value(0),        
    y2=alt.value(1),
    color=alt.Color('policy:N',
                    scale=alt.Scale(scheme='category10'))
).properties(height=30)

policy_labels = alt.Chart(policies).mark_text(
    baseline='middle', dy=-10, color='black'
).encode(
    x=alt.X('start:T', title=None),   
    text='policy:N'
).properties(height=30)

policy_row = alt.layer(policy_bars, policy_labels).properties(
    width=800,   
    title='Policy'
)

# build the slider
year_slider = alt.param(
    name='year_sel',
    bind=alt.binding_range(
        name='Show up to Year: ',
        min=int(df_year['Year'].min()),
        max=int(df_year['Year'].max()),
        step=1
    ),
    value=int(df_year['Year'].min())
)

# build the connected scatter plot
base = alt.Chart(df_year).transform_filter("datum.Year <= year_sel").encode(
    x=alt.X('Labor productivity:Q', title='Labor Productivity (Index, 2017=100)', scale=alt.Scale(domain=[97.5, 103])),
    y=alt.Y('Unit labor costs:Q',  title='Unit Labor Costs (Index, 2017=100)', scale=alt.Scale(domain=[80, 140])),
    order=alt.Order('Year:O'),
    tooltip=[
      alt.Tooltip('Year:O', title='Year'),
      alt.Tooltip('Labor productivity:Q', format='.2f'),
      alt.Tooltip('Unit labor costs:Q', format='.2f'),
    ]
)

line   = base.mark_line(color='#1f77b4', strokeWidth=1)
points = base.mark_point(color='#ff7f0e', size=100, filled=True)

scatter = (line + points).properties(
    width=800,    
    height=200
)

# build the bar plot
bar = (
    alt.Chart(df_estb_annual)
       .transform_filter("datum.Year == year_sel")
       .mark_bar(color='teal')
       .encode(
           y=alt.Y('Year:O', title=None, sort='descending'),
           x=alt.X('Average_Establishments:Q', title='Avg Private Establishments', scale = alt.Scale(domain=[0,420000])),
       )
).properties(
    width=800,   
    height=20
)

# build the line chart
years = list(range(2013, 2025))

base_growth = (
    alt.Chart(df_growth)
       .transform_filter("datum.Year <= year_sel")
       .encode(
           x=alt.X('Year:O', title='Year', scale=alt.Scale(domain=years)),
           y=alt.Y('GrowthPct:Q', title='Annual Growth Rate (%)', scale=alt.Scale(domain=[0, 4])),
           tooltip=[
               alt.Tooltip('Year:O'),
               alt.Tooltip('GrowthPct:Q', title='Growth Rate (%)')
           ]
       )
)

line_growth = base_growth.mark_line(color='darkgreen', strokeWidth=2)
points_growth = base_growth.mark_point(color='darkgreen', size=80)

growth_chart = (
    (line_growth + points_growth)
    .properties(
        height=100,
        width=800,
        title='Private Establishment Annual Growth Rate'
    )
)

# assemble
chart = alt.vconcat(
    policy_row,
    scatter,
    bar,
    growth_chart
).add_params(
    year_slider
).properties(
    title='Linked Connected Scatter & Annual Bar Chart'
).configure_title(    
        anchor='middle'
    )

chart

In [12]:

# Slider parameter
year_slider = alt.param(
    name='year_sel',
    bind=alt.binding_range(
        name='Show up to Year: ',
        min=int(df_year['Year'].min()),
        max=int(df_year['Year'].max()),
        step=1
    ),
    value=int(df_year['Year'].min())
)

# Connected scatter plot
base = alt.Chart(df_year).transform_filter("datum.Year <= year_sel").encode(
    x=alt.X('Labor productivity:Q', title='Labor Productivity (Index, 2017=100)', scale=alt.Scale(domain=[97.5, 103])),
    y=alt.Y('Unit labor costs:Q', title='Unit Labor Costs (Index, 2017=100)', scale=alt.Scale(domain=[80, 140])),
    order=alt.Order('Year:O'),
    tooltip=[
        alt.Tooltip('Year:O', title='Year'),
        alt.Tooltip('Labor productivity:Q', format='.2f'),
        alt.Tooltip('Unit labor costs:Q', format='.2f'),
    ]
)
line = base.mark_line(color='#1f77b4', strokeWidth=1)
points = base.mark_point(color='#ff7f0e', size=100, filled=True)
scatter = (line + points).properties(width=600, height=200)

# Growth line chart
years = list(range(2013, 2025))
base_growth = (
    alt.Chart(df_growth)
       .transform_filter("datum.Year <= year_sel")
       .encode(
           x=alt.X('Year:O', title='Year', scale=alt.Scale(domain=years)),
           y=alt.Y('GrowthPct:Q', title='Annual Growth Rate (%)', scale=alt.Scale(domain=[0, 4])),
           tooltip=[
               alt.Tooltip('Year:O'),
               alt.Tooltip('GrowthPct:Q', title='Growth Rate (%)')
           ]
       )
)
line_growth = base_growth.mark_line(color='darkgreen', strokeWidth=2)
points_growth = base_growth.mark_point(color='darkgreen', size=80)
growth_chart = (
    (line_growth + points_growth)
    .properties(
        height=100,
        width=600,
        title='Private Establishment Annual Growth Rate'
    )
)

# Vertical bar chart for selected year (updated)
bar_base = alt.Chart(df_estb_annual).transform_filter("datum.Year == year_sel")

bar = bar_base.mark_bar(color='teal').encode(
    x=alt.X('Year:O', title=None, axis=alt.Axis(ticks=False, labelAngle=0)),  # no ticks, 0-degree label
    y=alt.Y('Average_Establishments:Q', title='Avg Private Establishments', axis=alt.Axis(ticks=False),scale=alt.Scale(domain=[0, 420000])),
)

bar_text = bar_base.mark_text(
    align='center', baseline='bottom', dy=-5
).encode(
    x=alt.X('Year:O'),
    y='Average_Establishments:Q',
    text=alt.Text('Average_Establishments:Q', format=',')
)

bar_chart = (bar + bar_text).properties(
    width=120,
    height=320,
    title='Establishments (Selected Year)'
)

# Assemble layout: scatter and line chart stacked, with bar chart on the right
chart = alt.hconcat(
    alt.vconcat(scatter, growth_chart),
    bar_chart
).add_params(
    year_slider
).properties(
    title='Labor Efficiency, Growth, and Establishments'
).configure_title(anchor='middle')

chart

In [15]:
# Year slider
year_slider = alt.param(
    name='year_sel',
    bind=alt.binding_range(
        name='Show up to Year: ',
        min=int(df_year['Year'].min()),
        max=int(df_year['Year'].max()),
        step=1
    ),
    value=int(df_year['Year'].min())
)

# Bubble chart: Year vs Unit Labor Costs, bubble size = Labor Productivity
bubble = alt.Chart(df_year).transform_filter("datum.Year <= year_sel").mark_circle().encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('Unit labor costs:Q', title='Unit Labor Costs (Index, 2017=100)', scale=alt.Scale(domain=[80, 140])),
    size=alt.Size('Labor productivity:Q', title='Labor Productivity (Index, 2017=100)', scale=alt.Scale(range=[100, 1000])),
    color=alt.Color('Labor productivity:Q', scale=alt.Scale(scheme='blues'), legend=None),  # 💡 Hide legend
    tooltip=[
        alt.Tooltip('Year:O'),
        alt.Tooltip('Unit labor costs:Q', title='Unit Labor Costs', format='.2f'),
        alt.Tooltip('Labor productivity:Q', title='Labor Productivity', format='.2f')
    ]
).properties(
    width=600,
    height=200,
    title='Bubble Chart: Labor Productivity and Unit Labor Costs'
)



# Line chart for growth %
years = list(range(2013, 2025))
base_growth = (
    alt.Chart(df_growth)
       .transform_filter("datum.Year <= year_sel")
       .encode(
           x=alt.X('Year:O', title='Year', scale=alt.Scale(domain=years)),
           y=alt.Y('GrowthPct:Q', title='Annual Growth Rate (%)', scale=alt.Scale(domain=[0, 4])),
           tooltip=[
               alt.Tooltip('Year:O'),
               alt.Tooltip('GrowthPct:Q', title='Growth Rate (%)')
           ]
       )
)
line_growth = base_growth.mark_line(color='darkgreen', strokeWidth=2)
points_growth = base_growth.mark_point(color='darkgreen', size=80)
growth_chart = (
    (line_growth + points_growth)
    .properties(
        height=100,
        width=600,
        title='Private Establishment Annual Growth Rate'
    )
)

# Vertical bar chart with annotation, no axis labels or ticks
bar_base = alt.Chart(df_estb_annual).transform_filter("datum.Year == year_sel")

bar = bar_base.mark_bar(color='teal').encode(
    x=alt.X('Year:O', title=None, axis=alt.Axis(ticks=False, labels=True, labelAngle=0)),
    y=alt.Y('Average_Establishments:Q', title=None, axis=None),
)

bar_text = bar_base.mark_text(
    align='center', baseline='bottom', dy=-5
).encode(
    x=alt.X('Year:O', axis=alt.Axis(labelAngle=0)),
    y='Average_Establishments:Q',
    text=alt.Text('Average_Establishments:Q', format=',')
)

bar_chart = (bar + bar_text).properties(
    width=120,
    height=320,
    title='Establishments (Selected Year)'
)

# Final layout: bubble chart + growth chart stacked, bar chart to the right
chart = alt.hconcat(
    alt.vconcat(bubble, growth_chart),
    bar_chart
).add_params(
    year_slider
).properties(
    title='Labor Efficiency, Growth, and Establishments'
).configure_title(anchor='middle')

chart