In [1]:
import pandas as pd
import altair as alt

In [2]:
df = pd.read_excel("training_status_directory.xlsx")

df = df[(df['Title Group'] == "Associate") & (df['Region'] == "U.S.")]

In [3]:
grouped = df.groupby(['Department', 'Group', 'Trained']).size().reset_index(name='Count')

In [4]:
# Pivot the data to get counts of trained and untrained
pivot = grouped.pivot(index=['Department', 'Group'], columns='Trained', values='Count').reset_index()
pivot.columns = ['Department', 'Group', 'UntrainedCount', 'TrainedCount']
pivot = pivot.fillna(0)

# Add total column at the office level
pivot['Total'] = pivot['TrainedCount'] + pivot['UntrainedCount']

In [5]:
stacked_data_group = pivot.melt(
    id_vars=['Department', 'Group', 'Total'],
    value_vars=['TrainedCount', 'UntrainedCount'],
    var_name='TrainingStatus',
    value_name='Count'
)

# Change the categorical order here
stacked_data_group['TrainingStatus'] = pd.Categorical(
    stacked_data_group['TrainingStatus'].map({
        'UntrainedCount': 'Untrained',
        'TrainedCount': 'Trained'
    }),
)

In [6]:
# Prepare the data for region-level chart
department_data = pivot.groupby('Department').agg(
    Total=('Total', 'sum'),
    TrainedCount=('TrainedCount', 'sum'),
    UntrainedCount=('UntrainedCount', 'sum')
).reset_index()

stacked_data_department = department_data.melt(
    id_vars=['Department', 'Total'],
    value_vars=['TrainedCount', 'UntrainedCount'],
    var_name='TrainingStatus',
    value_name='Count'
)

# Match the same categorical order here
stacked_data_department['TrainingStatus'] = pd.Categorical(
    stacked_data_department['TrainingStatus'].map({
        'UntrainedCount': 'Untrained',
        'TrainedCount': 'Trained'
    }),
)

In [7]:
def create_department_chart(department):
    filtered_data = stacked_data_group[stacked_data_group['Department'] == department]
    return alt.Chart(filtered_data).mark_bar().encode(
        x=alt.X('Group:N', title='Group'),
        y=alt.Y('Count:Q', 
                title='Number of People',
                stack=True,
                sort="descending",
                scale=alt.Scale(reverse=False)),  # Ensure y-axis grows upward
        color=alt.Color(
            'TrainingStatus:N',
            scale=alt.Scale(
                domain=['Untrained', 'Trained'],
                range=['red', 'green']
            ),
            title='Training Status'
        ),
        tooltip=[
            alt.Tooltip('Department:N', title='Department'),
            alt.Tooltip('Group:N', title='Group'),
            alt.Tooltip('Total:Q', title='Total People'),
            alt.Tooltip('TrainingStatus:N', title='Status'),
            alt.Tooltip('Count:Q', title='Count')
        ]
    ).properties(
        title=f'{department} Department',
        width=300,
        height=200
    )

In [8]:
department_chart = alt.Chart(stacked_data_department).mark_bar().encode(
    x=alt.X('Department:N', title='Department'),
    y=alt.Y('Count:Q', 
            title='Number of People',
            stack=True,
            sort="descending",
            scale=alt.Scale(reverse=False)),  # Ensure y-axis grows upward
    color=alt.Color(
        'TrainingStatus:N',
        scale=alt.Scale(
            domain=['Untrained', 'Trained'],
            range=['red', 'green']
        ),
        title='Training Status'
    ),
    tooltip=[
        alt.Tooltip('Department:N', title='Department'),
        alt.Tooltip('Total:Q', title='Total People'),
        alt.Tooltip('TrainingStatus:N', title='Status'),
        alt.Tooltip('Count:Q', title='Count')
    ]
).properties(
    title='Department-Level Summary: Training Progress',
    width=600,
    height=300
)

In [9]:
# Create individual charts for each region
departments = stacked_data_group['Department'].unique()
charts = [create_department_chart(department) for department in departments]

In [10]:
# Combine the region-specific charts
group_charts = alt.hconcat(*charts).properties(
    title='Group-Level Detail: Training Progress by Department'
)

In [11]:
# Final Combined Chart
final_chart = alt.vconcat(department_chart, group_charts).properties(
    title='Training Progress Overview'
)

In [12]:
final_chart

In [13]:
df.Department.value_counts()

Department
Business & Finance    219
Litigation            216
Name: count, dtype: int64

In [14]:
final_chart.save("us.png")