In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
df = pd.read_csv("../data/Processed/GdpByInd.csv")
df.head()

Unnamed: 0,Industries,Group,Subgroup,1997,1998,1999,2000,2001,2002,2003,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Farms,Primary Industries,"Agriculture, forestry, fishing, and hunting",219380,207822,199949,204313,212334,202363,227873,...,442202,403163,379793,395529,395074,388056,389475,472905,574223,555343
1,"Forestry, fishing, and related activities",Primary Industries,"Agriculture, forestry, fishing, and hunting",38500,41946,45364,39279,39016,39510,41425,...,56379,54733,51122,53382,53053,50994,53806,61457,65499,66345
2,Oil and gas extraction,Primary Industries,Mining,98754,72923,85516,142980,142297,116711,165971,...,445747,241538,199796,253994,346639,316004,210508,421929,652942,478748
3,"Mining, except oil and gas",Primary Industries,Mining,56374,55520,53050,54675,55091,54443,57393,...,116134,104439,94841,101852,106906,106096,96316,115877,135150,136909
4,Support activities for mining,Primary Industries,Mining,22101,23409,17542,20888,29204,24830,29532,...,165762,135473,73078,106929,132025,135631,78964,70194,87563,103802


In [3]:
# Clean up column names just in case
df.columns = df.columns.str.strip()

# Identify actual year columns (assuming they are 4-digit years)
year_columns = [col for col in df.columns if col.isdigit() and len(col) == 4]

# Melt the data safely
df_long = df.melt(
    id_vars=["Group", "Subgroup"],
    value_vars=year_columns,
    var_name="Year",
    value_name="Value"
)

# Convert year to integer
df_long["Year"] = df_long["Year"].astype(int)


In [4]:
df_grouped = df_long.groupby(["Group", "Year"], as_index=False)["Value"].sum()


In [5]:
import numpy as np

unique_groups = df_grouped["Group"].unique()
group_positions = {
    group: (np.cos(i * 2 * np.pi / len(unique_groups)) * 3,
            np.sin(i * 2 * np.pi / len(unique_groups)) * 3)
    for i, group in enumerate(unique_groups)
}

# Assign positions
def assign_coords(row):
    gx, gy = group_positions[row["Group"]]
    return pd.Series({"x": gx, "y": gy})

coords = df_grouped.apply(assign_coords, axis=1)
df_bubbles = pd.concat([df_grouped, coords], axis=1)


In [69]:
import plotly.express as px

fig = px.scatter(
    df_bubbles,
    x="x",
    y="y",
    size="Value",
    color="Group",
    animation_frame="Year",
    hover_name="Group",
    size_max=100,
    title="Animated Economic Output by Group"
)

fig.update_layout(
    xaxis=dict(visible=False),
    yaxis=dict(visible=False),
    margin=dict(t=40, l=0, r=0, b=0),
    height=650
)

fig.show()


In [5]:
import plotly.express as px

fig = px.bar(
    df_grouped,
    x="Value",
    y="Group",
    color="Group",
    animation_frame="Year",
    orientation='h',
    title="Group-Level Output Race Over Time"
)

# Optional: cap x-axis to a fixed upper bound (e.g., 5 million)
fig.update_layout(
    showlegend=False,
    xaxis=dict(range=[0, 12500000]),  # adjust this based on your data scale
    margin=dict(t=40, l=100, r=40, b=40)
)

fig.show()
#fig.write_html("../img/gdp_barcharts.html")

In [7]:
fig_all.write_html("../img/gdp_barcharts.html")

NameError: name 'fig_all' is not defined

In [78]:
import altair as alt

# Ensure proper types
df_long["Year"] = df_long["Year"].astype(int)
df_grouped["Year"] = df_grouped["Year"].astype(int)
latest_year = df_grouped["Year"].max()
df_latest = df_grouped[df_grouped["Year"] == latest_year]

# Selection object
group_select = alt.selection_single(
    fields=["Group"],
    bind="legend",
    name="Select",
    empty="all"
)

# --- Top: Bar chart of group outputs ---
bar = alt.Chart(df_latest).mark_bar().encode(
    x=alt.X('Value:Q', title='Output'),
    y=alt.Y('Group:N', sort='-x'),
    color='Group:N',
    tooltip=['Group', 'Value']
).add_selection(
    group_select
).properties(
    title=f"Group Output in {latest_year}",
    height=300
)

# --- Line chart: group-level (default view) ---
group_line = alt.Chart(df_grouped).mark_line().encode(
    x='Year:O',
    y='Value:Q',
    color='Group:N',
    tooltip=['Group', 'Year', 'Value']
).transform_filter(
    group_select
)

# --- Line chart: subgroup-level (when selected) ---
subgroup_line = alt.Chart(df_long).mark_line(point=True).encode(
    x='Year:O',
    y='Value:Q',
    color=alt.Color('Subgroup:N', legend=alt.Legend(title="Subgroups")),
    tooltip=['Subgroup', 'Year', 'Value']
).transform_filter(
    group_select
)

# Overlay subgroup if selected, group view if not
line_chart = alt.layer(
    subgroup_line,
    group_line
).resolve_scale(
    color='independent'
).properties(
    title="Output Over Time",
    height=300
)

# --- Combine & Export ---
combined = alt.vconcat(bar, line_chart).configure_title(fontSize=18)
combined.save("../img/group_output_explorer.html")



the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



In [6]:
import plotly.express as px

# Step 1: Filter for 2013–2024
df_grouped_filtered = df_grouped[df_grouped["Year"].between(2013, 2024)].copy()

# Step 2: Add a column for custom coloring
df_grouped_filtered["Color"] = df_grouped_filtered["Group"].apply(
    lambda g: "Manufacturing" if g == "Manufacturing" else "Other"
)

# Step 3: Define color map (blue for Manufacturing, gray for others)
color_map = {
    "Manufacturing": "steelblue",
    "Other": "lightgray"
}

# Step 4: Build the animated bar chart
fig = px.bar(
    df_grouped_filtered,
    x="Value",
    y="Group",
    color="Color",
    animation_frame="Year",
    orientation='h',
    color_discrete_map=color_map,
    title="Group-Level Output Race Over Time (2013–2024)"
)

fig.update_layout(
    showlegend=False,
    xaxis=dict(range=[0, 12500000]),  # optional: adjust max
    margin=dict(t=40, l=100, r=40, b=40)
)

fig.show()
# fig.write_html("../img/gdp_barcharts_2013_2024.html")


In [7]:
# Total GDP by year
df_total = df_grouped_filtered.groupby("Year", as_index=False)["Value"].sum().rename(columns={"Value": "TotalGDP"})

# Merge and compute % contribution
df_mfg = df_grouped_filtered[df_grouped_filtered["Group"] == "Manufacturing"].copy()
df_mfg = df_mfg.merge(df_total, on="Year")
df_mfg["Percent"] = (df_mfg["Value"] / df_mfg["TotalGDP"]) * 100

# Plot using Plotly
import plotly.express as px
fig = px.line(
    df_mfg,
    x="Year",
    y="Percent",
    markers=True,
    title="Manufacturing's % Contribution to Total GDP (2013–2024)",
    labels={"Percent": "Share of GDP (%)"}
)
fig.update_layout(yaxis=dict(ticksuffix="%"))
fig.show()


In [18]:
import plotly.express as px

# Step 1: Pivot data for 2013, 2018, 2024
years = [2013, 2018, 2023]
df_pc = df_grouped_filtered[df_grouped_filtered["Year"].isin(years)].copy()
df_pivot = df_pc.pivot(index="Group", columns="Year", values="Value").reset_index()

# Step 2: Create parallel coordinates plot
fig = px.parallel_coordinates(
    df_pivot,
    dimensions=[2013, 2018, 2023],
    color=2013,  # or color='Group' if you want consistent coloring
    labels={2013: "2013", 2018: "2018", 2024: "2024"},
    title="GDP Output by Group (Parallel Coordinates: 2013 → 2018 → 2024)"
)

fig.update_layout(coloraxis_colorbar=dict(title="2013 Output"))
fig.show()


In [23]:
import plotly.express as px

# Select years
selected_years = [2013, 2015, 2017, 2019, 2021, 2023]

# Pivot the data
df_pc = df_grouped_filtered[df_grouped_filtered["Year"].isin(selected_years)].copy()
df_pivot = df_pc.pivot(index="Group", columns="Year", values="Value").reset_index()

# Fill any missing years
for y in selected_years:
    if y not in df_pivot.columns:
        df_pivot[y] = float('nan')

# Add 'Group' column as a visible dimension (first axis)
df_pivot.insert(0, "Industry", df_pivot["Group"])

# Plot
fig = px.parallel_coordinates(
    df_pivot,
    dimensions=["Industry"] + selected_years,  # Include Group name as visible dimension
    color=2013,
    labels={str(y): str(y) for y in selected_years},
    title="GDP Output by Group (Parallel Coordinates: 2013–2023)"
)

fig.update_layout(
    coloraxis_colorbar=dict(title="2013 Output"),
    margin=dict(l=80, r=80, t=60, b=60)
)

fig.show()


In [8]:
import plotly.express as px

# Step 1: Prepare years
years = [2013, 2018, 2023]
df_pc = df_grouped_filtered[df_grouped_filtered["Year"].isin(years)].copy()
df_pivot = df_pc.pivot(index="Group", columns="Year", values="Value").reset_index()

# Step 2: Add hover column (industry name)
df_pivot["Industry"] = df_pivot["Group"]

# Step 3: Create parallel coordinates plot (no color scale)
fig = px.parallel_coordinates(
    df_pivot,
    dimensions=years,
    color_continuous_scale=[[0, 'gray'], [1, 'gray']],  # set constant color
    color=df_pivot[years[0]],  # dummy input to override
    labels={year: str(year) for year in years},
    title="GDP Output by Group (Parallel Coordinates: 2013 → 2018 → 2023)"
)

# Step 4: Add hover with industry name
fig.update_traces(customdata=df_pivot[["Industry"]])
fig.update_traces(
    hovertemplate="<b>%{customdata[0]}</b><br>" +
                  "2013: %{dimensions[0].value}<br>" +
                  "2018: %{dimensions[1].value}<br>" +
                  "2023: %{dimensions[2].value}<br><extra></extra>"
)

fig.update_layout(
    coloraxis_showscale=False,
    margin=dict(l=80, r=80, t=60, b=60)
)

fig.show()


ValueError: Invalid property specified for object of type plotly.graph_objs.Parcoords: 'hovertemplate'

Did you mean "customdata"?

    Valid properties:
        customdata
            Assigns extra data each datum. This may be useful when
            listening to hover, click and selection events. Note
            that, "scatter" traces also appends customdata items in
            the markers DOM elements
        customdatasrc
            Sets the source reference on Chart Studio Cloud for
            `customdata`.
        dimensions
            The dimensions (variables) of the parallel coordinates
            chart. 2..60 dimensions are supported.
        dimensiondefaults
            When used in a template (as
            layout.template.data.parcoords.dimensiondefaults), sets
            the default property values to use for elements of
            parcoords.dimensions
        domain
            :class:`plotly.graph_objects.parcoords.Domain` instance
            or dict with compatible properties
        ids
            Assigns id labels to each datum. These ids for object
            constancy of data points during animation. Should be an
            array of strings, not numbers or any other type.
        idssrc
            Sets the source reference on Chart Studio Cloud for
            `ids`.
        labelangle
            Sets the angle of the labels with respect to the
            horizontal. For example, a `tickangle` of -90 draws the
            labels vertically. Tilted labels with "labelangle" may
            be positioned better inside margins when
            `labelposition` is set to "bottom".
        labelfont
            Sets the font for the `dimension` labels.
        labelside
            Specifies the location of the `label`. "top" positions
            labels above, next to the title "bottom" positions
            labels below the graph Tilted labels with "labelangle"
            may be positioned better inside margins when
            `labelposition` is set to "bottom".
        legend
            Sets the reference to a legend to show this trace in.
            References to these legends are "legend", "legend2",
            "legend3", etc. Settings for these legends are set in
            the layout, under `layout.legend`, `layout.legend2`,
            etc.
        legendgrouptitle
            :class:`plotly.graph_objects.parcoords.Legendgrouptitle
            ` instance or dict with compatible properties
        legendrank
            Sets the legend rank for this trace. Items and groups
            with smaller ranks are presented on top/left side while
            with "reversed" `legend.traceorder` they are on
            bottom/right side. The default legendrank is 1000, so
            that you can use ranks less than 1000 to place certain
            items before all unranked items, and ranks greater than
            1000 to go after all unranked items. When having
            unranked or equal rank items shapes would be displayed
            after traces i.e. according to their order in data and
            layout.
        legendwidth
            Sets the width (in px or fraction) of the legend for
            this trace.
        line
            :class:`plotly.graph_objects.parcoords.Line` instance
            or dict with compatible properties
        meta
            Assigns extra meta information associated with this
            trace that can be used in various text attributes.
            Attributes such as trace `name`, graph, axis and
            colorbar `title.text`, annotation `text`
            `rangeselector`, `updatemenues` and `sliders` `label`
            text all support `meta`. To access the trace `meta`
            values in an attribute in the same trace, simply use
            `%{meta[i]}` where `i` is the index or key of the
            `meta` item in question. To access trace `meta` in
            layout attributes, use `%{data[n[.meta[i]}` where `i`
            is the index or key of the `meta` and `n` is the trace
            index.
        metasrc
            Sets the source reference on Chart Studio Cloud for
            `meta`.
        name
            Sets the trace name. The trace name appears as the
            legend item and on hover.
        rangefont
            Sets the font for the `dimension` range values.
        stream
            :class:`plotly.graph_objects.parcoords.Stream` instance
            or dict with compatible properties
        tickfont
            Sets the font for the `dimension` tick values.
        uid
            Assign an id to this trace, Use this to provide object
            constancy between traces during animations and
            transitions.
        uirevision
            Controls persistence of some user-driven changes to the
            trace: `constraintrange` in `parcoords` traces, as well
            as some `editable: true` modifications such as `name`
            and `colorbar.title`. Defaults to `layout.uirevision`.
            Note that other user-driven trace attribute changes are
            controlled by `layout` attributes: `trace.visible` is
            controlled by `layout.legend.uirevision`,
            `selectedpoints` is controlled by
            `layout.selectionrevision`, and `colorbar.(x|y)`
            (accessible with `config: {editable: true}`) is
            controlled by `layout.editrevision`. Trace changes are
            tracked by `uid`, which only falls back on trace index
            if no `uid` is provided. So if your app can add/remove
            traces before the end of the `data` array, such that
            the same trace has a different index, you can still
            preserve user-driven changes if you give each trace a
            `uid` that stays with it as it moves.
        unselected
            :class:`plotly.graph_objects.parcoords.Unselected`
            instance or dict with compatible properties
        visible
            Determines whether or not this trace is visible. If
            "legendonly", the trace is not drawn, but can appear as
            a legend item (provided that the legend itself is
            visible).
        
Did you mean "customdata"?

Bad property path:
hovertemplate
^^^^^^^^^^^^^

In [10]:
import altair as alt
import pandas as pd

# Step 1: Prepare the data
years = [2013, 2015, 2017, 2019, 2021, 2023]
df_pc = df_grouped_filtered[df_grouped_filtered["Year"].isin(years)].copy()

# Step 2: Ensure all years exist
df_pivot = df_pc.pivot(index="Group", columns="Year", values="Value").reset_index()

# Step 3: Melt to long format for Altair
df_melted = df_pivot.melt(id_vars="Group", var_name="Year", value_name="Value")
df_melted["Year"] = df_melted["Year"].astype(str)  # for ordinal x-axis

# Step 4: Create the line chart
chart = alt.Chart(df_melted).mark_line(point=True).encode(
    x=alt.X("Year:N", title="Year"),
    y=alt.Y("Value:Q", title="Output (Billion $)"),
    color=alt.Color("Group:N", legend=None),
    detail="Group:N",
    tooltip=["Group", "Year", "Value"]
).properties(
    width=700,
    height=400,
    title="GDP Output by Group (Simulated Parallel Coordinates: 2013–2023)"
).interactive()

chart



the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



In [11]:
import altair as alt
import pandas as pd

# Step 1: Compute total GDP per year
df_total = df_grouped_filtered.groupby("Year", as_index=False)["Value"].sum().rename(columns={"Value": "TotalGDP"})

# Step 2: Merge total and compute percent contribution
df_percent = df_grouped_filtered.merge(df_total, on="Year")
df_percent["Percent"] = (df_percent["Value"] / df_percent["TotalGDP"]) * 100

# Step 3: Compute rank per year
df_percent["Rank"] = df_percent.groupby("Year")["Percent"].rank(ascending=False, method="first")

# Step 4: Filter for selected years (optional)
years = [2013, 2015, 2017, 2019, 2021, 2023]
df_bump = df_percent[df_percent["Year"].isin(years)].copy()
df_bump["Year"] = df_bump["Year"].astype(str)

# Step 5: Optional - Keep only top N industries per year
top_n = 10
top_groups = df_bump[df_bump["Rank"] <= top_n]["Group"].unique()
df_bump = df_bump[df_bump["Group"].isin(top_groups)]

# Step 6: Plot bump chart
bump_chart = alt.Chart(df_bump).mark_line(point=True).encode(
    x=alt.X("Year:N", title="Year"),
    y=alt.Y("Rank:Q", title="Rank (1 = Top Contributor)", sort="ascending"),
    color=alt.Color("Group:N", legend=alt.Legend(title="Group")),
    detail="Group:N",
    tooltip=["Group", "Year", alt.Tooltip("Percent:Q", format=".2f"), alt.Tooltip("Rank:Q", format=".0f")]
).properties(
    width=700,
    height=400,
    title="Rank of Industry Contribution to GDP Over Time (Bump Chart)"
).interactive()

bump_chart



the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.

