In [64]:
import pandas as pd
import numpy as np
import plotly.express as px

In [65]:
df = pd.read_csv("../data/Processed/GdpByInd.csv")
df.head()

Unnamed: 0,Industries,Group,Subgroup,1997,1998,1999,2000,2001,2002,2003,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Farms,Primary Industries,"Agriculture, forestry, fishing, and hunting",219380,207822,199949,204313,212334,202363,227873,...,442202,403163,379793,395529,395074,388056,389475,472905,574223,555343
1,"Forestry, fishing, and related activities",Primary Industries,"Agriculture, forestry, fishing, and hunting",38500,41946,45364,39279,39016,39510,41425,...,56379,54733,51122,53382,53053,50994,53806,61457,65499,66345
2,Oil and gas extraction,Primary Industries,Mining,98754,72923,85516,142980,142297,116711,165971,...,445747,241538,199796,253994,346639,316004,210508,421929,652942,478748
3,"Mining, except oil and gas",Primary Industries,Mining,56374,55520,53050,54675,55091,54443,57393,...,116134,104439,94841,101852,106906,106096,96316,115877,135150,136909
4,Support activities for mining,Primary Industries,Mining,22101,23409,17542,20888,29204,24830,29532,...,165762,135473,73078,106929,132025,135631,78964,70194,87563,103802


In [66]:
# Clean up column names just in case
df.columns = df.columns.str.strip()

# Identify actual year columns (assuming they are 4-digit years)
year_columns = [col for col in df.columns if col.isdigit() and len(col) == 4]

# Melt the data safely
df_long = df.melt(
    id_vars=["Group", "Subgroup"],
    value_vars=year_columns,
    var_name="Year",
    value_name="Value"
)

# Convert year to integer
df_long["Year"] = df_long["Year"].astype(int)


In [67]:
df_grouped = df_long.groupby(["Group", "Year"], as_index=False)["Value"].sum()


In [68]:
import numpy as np

unique_groups = df_grouped["Group"].unique()
group_positions = {
    group: (np.cos(i * 2 * np.pi / len(unique_groups)) * 3,
            np.sin(i * 2 * np.pi / len(unique_groups)) * 3)
    for i, group in enumerate(unique_groups)
}

# Assign positions
def assign_coords(row):
    gx, gy = group_positions[row["Group"]]
    return pd.Series({"x": gx, "y": gy})

coords = df_grouped.apply(assign_coords, axis=1)
df_bubbles = pd.concat([df_grouped, coords], axis=1)


In [69]:
import plotly.express as px

fig = px.scatter(
    df_bubbles,
    x="x",
    y="y",
    size="Value",
    color="Group",
    animation_frame="Year",
    hover_name="Group",
    size_max=100,
    title="Animated Economic Output by Group"
)

fig.update_layout(
    xaxis=dict(visible=False),
    yaxis=dict(visible=False),
    margin=dict(t=40, l=0, r=0, b=0),
    height=650
)

fig.show()


In [79]:
import plotly.express as px

fig = px.bar(
    df_grouped,
    x="Value",
    y="Group",
    color="Group",
    animation_frame="Year",
    orientation='h',
    title="Group-Level Output Race Over Time"
)

# Optional: cap x-axis to a fixed upper bound (e.g., 5 million)
fig.update_layout(
    showlegend=False,
    xaxis=dict(range=[0, 12500000]),  # adjust this based on your data scale
    margin=dict(t=40, l=100, r=40, b=40)
)

fig.show()


In [78]:
import altair as alt

# Ensure proper types
df_long["Year"] = df_long["Year"].astype(int)
df_grouped["Year"] = df_grouped["Year"].astype(int)
latest_year = df_grouped["Year"].max()
df_latest = df_grouped[df_grouped["Year"] == latest_year]

# Selection object
group_select = alt.selection_single(
    fields=["Group"],
    bind="legend",
    name="Select",
    empty="all"
)

# --- Top: Bar chart of group outputs ---
bar = alt.Chart(df_latest).mark_bar().encode(
    x=alt.X('Value:Q', title='Output'),
    y=alt.Y('Group:N', sort='-x'),
    color='Group:N',
    tooltip=['Group', 'Value']
).add_selection(
    group_select
).properties(
    title=f"Group Output in {latest_year}",
    height=300
)

# --- Line chart: group-level (default view) ---
group_line = alt.Chart(df_grouped).mark_line().encode(
    x='Year:O',
    y='Value:Q',
    color='Group:N',
    tooltip=['Group', 'Year', 'Value']
).transform_filter(
    group_select
)

# --- Line chart: subgroup-level (when selected) ---
subgroup_line = alt.Chart(df_long).mark_line(point=True).encode(
    x='Year:O',
    y='Value:Q',
    color=alt.Color('Subgroup:N', legend=alt.Legend(title="Subgroups")),
    tooltip=['Subgroup', 'Year', 'Value']
).transform_filter(
    group_select
)

# Overlay subgroup if selected, group view if not
line_chart = alt.layer(
    subgroup_line,
    group_line
).resolve_scale(
    color='independent'
).properties(
    title="Output Over Time",
    height=300
)

# --- Combine & Export ---
combined = alt.vconcat(bar, line_chart).configure_title(fontSize=18)
combined.save("../img/group_output_explorer.html")



the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.

