In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import pypsa
import seaborn as sns
from pypsa.statistics import StatisticsAccessor, get_bus_and_carrier

In [None]:
def get_node_carrier_emissions_timeseries(n: pypsa.Network) -> pd.DataFrame:
    """Gets timeseries emissions by bus and carrier."""
    energy = get_primary_energy_use(n)
    co2 = n.carriers[["nice_name", "co2_emissions"]].reset_index().set_index("nice_name")[["co2_emissions"]].squeeze()
    return energy.mul(co2, level="carrier", axis=0)


def get_primary_energy_use(n: pypsa.Network) -> pd.DataFrame:
    """Gets timeseries primary energy use by bus and carrier."""
    link_energy_use = (
        StatisticsAccessor(n)
        .withdrawal(
            comps=["Link", "Store", "StorageUnit"],
            aggregate_time=False,
            groupby=get_bus_and_carrier,
        )
        .droplevel("component")
    )

    gen_dispatch = (
        StatisticsAccessor(n)
        .supply(
            aggregate_time=False,
            comps=["Generator"],
            groupby=pypsa.statistics.get_name_bus_and_carrier,
        )
        .droplevel("component")
    )
    gen_eff = n.get_switchable_as_dense("Generator", "efficiency")

    gen_energy_use = gen_dispatch.T.mul(1 / gen_eff, axis=0, level="name").T.droplevel(
        "name",
    )

    return (
        pd.concat([gen_energy_use, link_energy_use])
        # .reset_index() commenting this out seems to fix issue in multi-horizon indexing
        .groupby(["bus", "carrier"])
        .sum()
    )


def get_node_emissions_timeseries(n: pypsa.Network) -> pd.DataFrame:
    """Gets timeseries emissions per node."""
    return (
        get_node_carrier_emissions_timeseries(n)
        .droplevel("carrier")
        # .reset_index() fix for multi-horizon
        .groupby("bus")
        .sum()
        .T
    )

In [None]:
# Base folders
pudl_folder = "validation_usa_pudl/figures/s300_cluster_133m/lv1.0_Ep_E/"
wholesale_folder = "validation_usa_wholesale/figures/s300_cluster_133m/"

# State Emissions Deviation Plot

In [None]:
# df_em = pd.read_csv("validation_usa_pudl/figures/s250_cluster_133m/lv1.0_Ep_E/val_bar_state_emissions.csv")
# df_em_ws = pd.read_csv("validation_usa_wholesale/figures/s150_cluster_133m/lv1.0_Ep_E/val_bar_state_emissions.csv")
df_em = pd.read_csv(f"{pudl_folder}val_bar_state_emissions.csv")
df_em_ws = pd.read_csv(f"{wholesale_folder}val_bar_state_emissions.csv")
df_em_ws = df_em_ws.loc[df_em_ws["variable"] == "Optimized"]
df_em_ws.variable = "Wholesale"
df_em.loc[df_em["variable"] == "Optimized", "variable"] = "Fuel Receipts"
df_em = pd.concat([df_em, df_em_ws])
df_em

In [None]:
df_sum = df_em.groupby("variable", as_index=False)["value"].sum()

# Define a color palette based on the unique values of "variable"
unique_variables = df_em["variable"].unique()
palette = sns.color_palette("muted", len(unique_variables))
color_mapping = dict(zip(unique_variables, palette))

# Create a figure with two subplots and customized height ratio
fig, (ax_sum, ax) = plt.subplots(2, 1, figsize=(8, 10), gridspec_kw={"height_ratios": [0.15, 3]}, sharex=False)

# Plot the summed data in the upper subplot with smaller bar width
sns.barplot(
    data=df_sum,
    x="value",
    y="variable",
    orient="horizontal",
    ax=ax_sum,
    palette=color_mapping,
)
# Customize the look of the top panel
ax_sum.set_ylabel("")  # Remove y-axis label for a cleaner look
ax_sum.set_xlabel("")  # Remove x-axis label to unify flow
ax_sum.set_title("U.S. Total Emissions", fontsize=14)  # Add a title to the top panel
ax_sum.set_xlim(0, df_em["value"].sum() * 1.1)  # Add padding for visual consistency
ax_sum.tick_params(axis="y", labelsize=10)  # Smaller y-axis labels for a compact look
# set y-axis label font size

# Plot the original state-level data in the lower subplot with matching colors
sns.barplot(
    data=df_em,
    y="state",
    x="value",
    hue="variable",
    orient="horizontal",
    ax=ax,
    palette=color_mapping,
)
ax.set_ylabel("")  # Remove y-axis label for consistency
# ax.set_title("State Emissions")  # Add a title to the top panel
ax.legend(title="")  # Place the legend outside the plot
ax.tick_params(axis="y", labelsize=10)  # Smaller y-axis labels for a compact look


# Add a single title above both plots
fig.suptitle("Simulated CO2 Emissions Comparison with Historical Observation", fontsize=16)

# Set the x-axis label on the bottom subplot only
ax.set_xlabel("CO2 Emissions [MMtCO2]", fontsize=14)

# Use a single legend placed outside the plots, with colors matching the bar plots
# handles, labels = ax.get_legend_handles_labels()
# fig.legend(handles, labels, loc="lower center", ncol=len(labels), title="", bbox_to_anchor=(0.5, -0.02))

# Adjust layout to fit everything neatly
plt.tight_layout(rect=[0, 0, 1, 1])
plt.show()

# Statistics for Paper Text

In [None]:
df_em_ = df_em.set_index("state")
wholesale = df_em_.loc[df_em_["variable"] == "Wholesale", "value"]
pudl = df_em_.loc[df_em_["variable"] == "Fuel Receipts", "value"]
historical = df_em_.loc[df_em_["variable"] == "Historical", "value"]

ws_error = wholesale - historical
pudl_error = pudl - historical

ws_error_pct = (ws_error / historical) * 100
pudl_error_pct = (pudl_error / historical) * 100

In [None]:
print("PUDL Stats: ", pudl_error.abs().describe())
print("WS stats: ", ws_error.abs().describe())

In [None]:
plt.boxplot([ws_error_pct, pudl_error_pct], labels=["Wholesale", "PUDL"])
plt.ylabel("Percentage Error")
plt.title("Percentage Error of CO2 Emissions")
plt.ylim(-100, 100)
# log scale

In [None]:
print("PUDL Stats: ", pudl_error_pct.abs().describe())
print("WS stats: ", ws_error_pct.abs().describe())

In [None]:
print("PUDL Stats: ", pudl_error.abs().describe())
print("WS stats: ", ws_error.abs().describe())

## State Generation Deviation Table and Plot

In [None]:
n = pypsa.Network(
    "/Users/kamrantehranchi/Local_Documents/pypsa-usa/workflow/notebooks/PaperFigures/validation_usa_wholesale/networks/elec_s150_c133m_ec_lv1.0_Ep_E_operations.nc"
)

In [None]:
df = pd.read_csv(f"{wholesale_folder}val_state_generation_deviation.csv", index_col=0)

In [None]:
# Assuming `df` and `n.carriers.color` are already defined
# Filter and prepare data
historical_gen = df.filter(regex="_historical$")
historical_gen.columns = historical_gen.columns.str.replace("_historical", "")

optimized = df.filter(regex="_optimized$")
optimized.columns = optimized.columns.str.replace("_optimized", "")

# Calculate production deviation at the state level
diff_total = (optimized - historical_gen).fillna(0).T.div(historical_gen.sum(axis=1)).mul(1e2).round(1).T

# Calculate U.S. total deviation by summing across all states for each carrier
us_total_historical = historical_gen.sum()
us_total_optimized = optimized.sum()
us_diff_total = ((us_total_optimized - us_total_historical) / us_total_historical.sum()).mul(1e2).round(1)

# Convert `us_diff_total` to a DataFrame with a single row
us_diff_total_df = pd.DataFrame([us_diff_total], index=["U.S. Total"])

# Map colors for carriers
colors = n.carriers.color.to_dict()
colors["natural gas"] = colors.pop("CCGT")
colors["other"] = colors.pop("load")

# Create a figure with two subplots for U.S. total and state-level comparison
fig, (ax_us, ax_state) = plt.subplots(2, 1, figsize=(8, 10), gridspec_kw={"height_ratios": [0.08, 3]}, sharex=True)

# Plot the U.S. total deviation as a stacked horizontal bar in the upper subplot
us_diff_total_df.plot(
    kind="barh", stacked=True, color=[colors[carrier] for carrier in us_diff_total.index], ax=ax_us, legend=False
)
ax_us.set_title("", fontsize=12)
ax_us.set_xlabel("")  # Remove x-axis label for a unified flow
ax_us.set_ylabel("")  # No y-axis label to keep the layout clean
ax_us.tick_params(axis="y", labelsize=12)  # Smaller y-axis labels for a compact look
ax_us.axvline(0, color="black", lw=0.3, ls="--")

# Plot the state-level deviation in the lower subplot with stacked bars
diff_total.plot(kind="barh", stacked=True, ax=ax_state, color=colors)
ax_state.set_xlabel("Production Deviation [% of Total Generation]", fontsize=14)
ax_state.set_ylabel("")
ax_state.tick_params(axis="y", labelsize=12)  # Smaller y-axis labels for a compact look
ax_state.invert_yaxis()

# add vertical line at 0
ax_state.axvline(0, color="black", lw=0.3, ls="--")

# Add a single title above both plots
fig.suptitle("Generation Deviation by State and Carrier", fontsize=16)

# Adjust layout to fit everything neatly
plt.tight_layout(rect=[0, 0, 1, 1])
plt.show()

# PUDL fuel cost data 

In [None]:
extendable_links = n.links.loc[n.links.index.str.contains("exp")]
extendable_links["cost_per_length"] = extendable_links.capital_cost / extendable_links.length
extendable_links.cost_per_length.plot(kind="hist", bins=100)
plt.xlabel("Capital Cost [USD/MW-mi-yr]")
plt.ylabel("Frequency")
plt.title("Histogram of Transmission Expansion Capital Costs")

In [None]:
extendable_links

In [None]:
ppl = pd.read_csv("/Users/kamrantehranchi/Local_Documents/pypsa-usa/workflow/resources/ca_reeds/powerplants.csv")
ppl.set_index("generator_name", inplace=True)
carriers = ["coal", "CCGT", "OCGT", "oil"]
ppl = ppl.loc[ppl["carrier"].isin(carriers)]
ppl

In [None]:
df_pudl_fc = pd.read_csv(
    "/Users/kamrantehranchi/Local_Documents/pypsa-usa/workflow/notebooks/PaperFigures/validation_usa_pudl/pudl_fuel_costs.csv",
    index_col=0,
)

In [None]:
plant_avg = df_pudl_fc.mean()
ppl["fuel_cost_temp"] = ppl.index.map(plant_avg)

In [None]:
ppl_de = ppl.loc[ppl["state"].isin(["DE", "MD"])]
ppl_de["mc_diff"] = ppl_de["marginal_cost"] - ppl_de["fuel_cost"]
ppl_de.to_csv("ppl_de.csv")

In [None]:
sns.displot(ppl, x="marginal_cost", hue="carrier", bins=500)
plt.xlim(0, 100)

In [None]:
sns.boxplot(
    ppl, x="marginal_cost", y="carrier", hue="nerc_region", width=0.6, flierprops=dict(marker="o", markersize=3)
)
plt.title("Distributions of Marginal Cost by NERC Region and Fossil Carrier in 2019")
plt.xlabel("Marginal Cost [USD/MWh]")
plt.ylabel("")
plt.legend(title="NERC Region")
plt.xlim(0, 300)

In [None]:
sns.boxplot(ppl, x="efficiency", y="carrier", hue="nerc_region", width=0.6, flierprops=dict(marker="o", markersize=3))

In [None]:
df_avg = df_pudl_fc.groupby(ppl.carrier, axis=1).mean()

In [None]:
df_avg.plot()