In [2]:
import pandas as pd
import numpy as np
import plotly.express as px

In [3]:
df = pd.read_csv("../data/Processed/GdpByInd.csv")
df.head()

Unnamed: 0,Industries,Group,Subgroup,1997,1998,1999,2000,2001,2002,2003,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Farms,Primary Industries,"Agriculture, forestry, fishing, and hunting",219380,207822,199949,204313,212334,202363,227873,...,442202,403163,379793,395529,395074,388056,389475,472905,574223,555343
1,"Forestry, fishing, and related activities",Primary Industries,"Agriculture, forestry, fishing, and hunting",38500,41946,45364,39279,39016,39510,41425,...,56379,54733,51122,53382,53053,50994,53806,61457,65499,66345
2,Oil and gas extraction,Primary Industries,Mining,98754,72923,85516,142980,142297,116711,165971,...,445747,241538,199796,253994,346639,316004,210508,421929,652942,478748
3,"Mining, except oil and gas",Primary Industries,Mining,56374,55520,53050,54675,55091,54443,57393,...,116134,104439,94841,101852,106906,106096,96316,115877,135150,136909
4,Support activities for mining,Primary Industries,Mining,22101,23409,17542,20888,29204,24830,29532,...,165762,135473,73078,106929,132025,135631,78964,70194,87563,103802


In [4]:
# Clean up column names just in case
df.columns = df.columns.str.strip()

# Identify actual year columns (assuming they are 4-digit years)
year_columns = [col for col in df.columns if col.isdigit() and len(col) == 4]

# Melt the data safely
df_long = df.melt(
    id_vars=["Group", "Subgroup"],
    value_vars=year_columns,
    var_name="Year",
    value_name="Value"
)

# Convert year to integer
df_long["Year"] = df_long["Year"].astype(int)


In [5]:
df_grouped = df_long.groupby(["Group", "Year"], as_index=False)["Value"].sum()


In [6]:
import altair as alt

# Ensure proper types
df_long["Year"] = df_long["Year"].astype(int)
df_grouped["Year"] = df_grouped["Year"].astype(int)
latest_year = df_grouped["Year"].max()
df_latest = df_grouped[df_grouped["Year"] == latest_year]

# Selection object
group_select = alt.selection_single(
    fields=["Group"],
    bind="legend",
    name="Select",
    empty="all"
)

# --- Top: Bar chart of group outputs ---
bar = alt.Chart(df_latest).mark_bar().encode(
    x=alt.X('Value:Q', title='Output'),
    y=alt.Y('Group:N', sort='-x'),
    color='Group:N',
    tooltip=['Group', 'Value']
).add_selection(
    group_select
).properties(
    title=f"Group Output in {latest_year}",
    height=300
)

# --- Line chart: group-level (default view) ---
group_line = alt.Chart(df_grouped).mark_line().encode(
    x='Year:O',
    y='Value:Q',
    color='Group:N',
    tooltip=['Group', 'Year', 'Value']
).transform_filter(
    group_select
)

# --- Line chart: subgroup-level (when selected) ---
subgroup_line = alt.Chart(df_long).mark_line(point=True).encode(
    x='Year:O',
    y='Value:Q',
    color=alt.Color('Subgroup:N', legend=alt.Legend(title="Subgroups")),
    tooltip=['Subgroup', 'Year', 'Value']
).transform_filter(
    group_select
)

# Overlay subgroup if selected, group view if not
line_chart = alt.layer(
    subgroup_line,
    group_line
).resolve_scale(
    color='independent'
).properties(
    title="Output Over Time",
    height=300
)

# --- Combine & Export ---
combined = alt.vconcat(bar, line_chart).configure_title(fontSize=18)
combined.save("../img/group_output_explorer.html")


Deprecated since `altair=5.0.0`. Use selection_point instead.
  group_select = alt.selection_single(
Deprecated since `altair=5.0.0`. Use add_params instead.
  ).add_selection(


In [7]:
import plotly.express as px

# Step 1: Filter for 2013–2024
df_grouped_filtered = df_grouped[df_grouped["Year"].between(2013, 2024)].copy()

# Step 2: Add a column for custom coloring
df_grouped_filtered["Color"] = df_grouped_filtered["Group"].apply(
    lambda g: "Manufacturing" if g == "Manufacturing" else "Other"
)

# Step 3: Define color map (blue for Manufacturing, gray for others)
color_map = {
    "Manufacturing": "steelblue",
    "Other": "lightgray"
}

# Step 4: Build the animated bar chart
fig = px.bar(
    df_grouped_filtered,
    x="Value",
    y="Group",
    color="Color",
    animation_frame="Year",
    orientation='h',
    color_discrete_map=color_map,
    title="Group-Level Output Race Over Time (2013–2024)"
)

fig.update_layout(
    showlegend=False,
    xaxis=dict(range=[0, 12500000]),  # optional: adjust max
    margin=dict(t=40, l=100, r=40, b=40)
)

fig.show()
# fig.write_html("../img/gdp_barcharts_2013_2024.html")


In [8]:
import plotly.express as px

# Filter for 2013–2024
df_grouped_filtered = df_grouped[df_grouped["Year"].between(2013, 2024)].copy()

# Convert from millions to billions
df_grouped_filtered["Value_Trillions"] = df_grouped_filtered["Value"] / 1000000

# Step 2: Add a column for custom coloring
df_grouped_filtered["Color"] = df_grouped_filtered["Group"].apply(
    lambda g: "Manufacturing" if g == "Manufacturing" else "Other"
)

# Step 3: Define color map (blue for Manufacturing, gray for others)
color_map = {
    "Manufacturing": "#28293D",
    "Other": "#9997bc"
}

fig = px.bar(
    df_grouped_filtered,
    x="Value_Trillions",
    y="Group",
    color="Color",
    animation_frame="Year",
    orientation='h',
    color_discrete_map=color_map,
    hover_data={"Color": False}  
)

# Set custom tooltip on static traces (initial frame)
for trace in fig.data:
    trace.hovertemplate = "<b>%{y}</b><br>Value (Trillion $): %{x:.2f}<extra></extra>"

# Set custom tooltip on animated frames
for frame in fig.frames:
    for trace in frame.data:
        trace.hovertemplate = "<b>%{y}</b><br>Value (Trillion $): %{x:.2f}<extra></extra>"

fig.update_layout(
    showlegend=False,
    xaxis=dict(
        title="Output (Trillions $)",
        tickformat=".2f",
        range=[0, 12.5],
        showgrid=False,  # Turn off vertical grid lines
        zeroline=False   # Turn off the thick zero line
    ),
    yaxis=dict(
        title = "Sectors",
        showgrid=False,  # Turn off horizontal grid lines
        zeroline=False
    ),
    plot_bgcolor='white',   # Plot area background
    paper_bgcolor='white',  # Entire figure background
    margin=dict(t=40, l=100, r=40, b=40)
)

fig.show()


In [13]:
fig.write_html("../img/gdp_barcharts_2013_2024.html")