## Evaluation of Future Energy Scenarios

#### Country Code & Network

In [None]:
"""
Setup environment and load the base PyPSA-Earth network for a specified country.
"""

import os
import sys
import warnings
import pypsa
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import geopandas as gpd
import fiona
import warnings
from pathlib import Path
import requests
import shutil
import pypsa


import numpy as np
from matplotlib.colors import LinearSegmentedColormap, to_hex

#from scripts._helpers import (two_2_three_digits_country, two_digits_2_name_country)

# Suppress warnings
warnings.simplefilter('ignore', category=FutureWarning)
warnings.simplefilter('ignore', category=UserWarning)

# ---------------------------------------------------------------------------
# Warning configuration
# ---------------------------------------------------------------------------
# Suppress unnecessary warnings for cleaner output
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# ---------------------------------------------------------------------------
# Environment setup
# ---------------------------------------------------------------------------
# Ensure the working directory includes the 'pypsa-earth' folder
if not os.path.isdir("pypsa-earth"):
    os.chdir("../..")

# Add PyPSA-Earth scripts to the system path
scripts_path = os.path.join(os.getcwd(), "pypsa-earth", "scripts")
sys.path.append(scripts_path)

print(f"Scripts path added: {scripts_path}")
assert os.path.isdir(scripts_path), f"Path not found: {scripts_path}"

# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
# Define country parameters
country_code = "EC"        # ISO 2-letter code (e.g., 'GH' for Ghana, 'CO' for Colombia)
country_name = "Ecuador"   # Country name
country_gadm = "ECU"       # ISO 3-letter GADM code

# ---------------------------------------------------------------------------
# Load network
# ---------------------------------------------------------------------------
# Define network file location
network_dir = os.path.join(os.getcwd(), "pypsa-earth", "networks")
network_file = "base.nc"
network_path = os.path.join(network_dir, network_file)

# Load the PyPSA network
network = pypsa.Network(network_path)

print(f"Network loaded successfully from: {network_path}")


#### Mapping buses and place of interest

In [None]:
"""
Plot Ecuadorian transmission buses/lines colored by nominal voltage.

Notes:
- Ecuador uses 500 kV, 230 kV, 138 kV, and 69 kV per:
  https://www.ambienteyenergia.gob.ec/wp-content/uploads/2020/01/5.-PLAN-DE-EXPANSION-DE-LA-TRANSMISION.pdf
- Validate voltage levels later with TTDO data.
- Optionally suppress buses without connected lines/transformers (TBD).
"""

from pathlib import Path
import os
import shutil
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import requests
from matplotlib.colors import LinearSegmentedColormap, to_hex

# ---------------------------------------------------------------------
# Colors for bus/line voltages
# ---------------------------------------------------------------------
uniques = np.sort(network.buses.v_nom.unique())
n_unique = len(uniques)

if n_unique == 1:
    colors = [to_hex("#006400")]  # dark green fallback
else:
    cmap = LinearSegmentedColormap.from_list("green_to_blue", ["#34F034", "#00008B"])
    colors = [to_hex(c) for c in cmap(np.linspace(0, 1, n_unique))]

# Map v_nom -> color
vnom_to_color = dict(zip(uniques, colors))

# Optional: export transformers for inspection
network.transformers.to_csv("ecuador_transformers.csv", index=False)

# ---------------------------------------------------------------------
# GADM boundary data (expects `country_gadm` to be defined upstream, e.g., "ECU")
# ---------------------------------------------------------------------
GADM_filename = f"gadm41_{country_gadm}"
GADM_url = f"https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/{GADM_filename}.gpkg"
GADM_inputfile_gpkg = os.path.join(os.getcwd(), "pypsa-earth", "data", "gadm", f"{GADM_filename}.gpkg")

if not Path(GADM_inputfile_gpkg).is_file():
    resp = requests.get(GADM_url, stream=True, timeout=300)
    resp.raise_for_status()
    Path(GADM_inputfile_gpkg).parent.mkdir(parents=True, exist_ok=True)
    with open(GADM_inputfile_gpkg, "wb") as f:
        shutil.copyfileobj(resp.raw, f)

# ---------------------------------------------------------------------
# Plot
# ---------------------------------------------------------------------
fig, ax = plt.subplots(figsize=(10, 8), subplot_kw={"projection": ccrs.PlateCarree()})
ax.set_title("Buses by Nominal Voltage")

# Admin boundaries
adm1 = gpd.read_file(GADM_inputfile_gpkg, layer="ADM_ADM_1")
adm1.boundary.plot(ax=ax, linewidth=0.2, color="black")

# Line widths proportional to voltage
v_to_width = {v: v * 0.004 + 0.5 for v in uniques}

network.plot(
    ax=ax,
    bus_colors=network.buses.v_nom.map(vnom_to_color),
    line_colors=network.lines.v_nom.map(vnom_to_color),
    line_widths=network.lines.v_nom.map(v_to_width),
    bus_sizes=0.01 / 5,
    color_geomap=True,
)

# Legend
handles = [
    plt.Line2D([0], [0], marker="o", linestyle="none",
               markerfacecolor=vnom_to_color[v], markeredgecolor="none",
               markersize=10, label=f"{v} kV")
    for v in vnom_to_color
]
ax.legend(
    handles=handles,
    title="Nominal Voltage",
    loc="upper left",
    bbox_to_anchor=(1.1, 1),
    borderaxespad=0,
)

plt.show()


In [None]:
network.buses.index

#Plotting all the generation points

Data was manually prepared from
 
https://www.ambienteyenergia.gob.ec/wp-content/uploads/2020/01/5.-PLAN-DE-EXPANSION-DE-LA-TRANSMISION.pdf



In [None]:
import os
import pandas as pd
import unicodedata

# === Step 0: Assert the file exists ===
generation_path = os.path.join(os.getcwd(), "documentation", "ecuador_data")
generation_file = "generation_filtered.xlsx"
full_generation_path = os.path.join(generation_path, generation_file)
assert os.path.isfile(full_generation_path), f"‚ùå Generation file not found: {full_generation_path}"

# === Step 1: Load the Excel file ===
input_file = full_generation_path
df = pd.read_excel(input_file)

# === Step 2: Define the new structure ===
columns = [
    "Name", "Fueltype", "Technology", "Set", "Country", "Capacity",
    "Efficiency", "Duration", "Volume_Mm3", "DamHeight_m",
    "StorageCapacity_MWh", "DateIn", "DateRetrofit", "DateOut",
    "lat", "lon", "EIC", "projectID"
]
new_df = pd.DataFrame(columns=columns)

# === Step 3: Map available data ===
new_df["Name"] = df["Central"]
new_df["Fueltype"] = df["Tipo de Central"]
new_df["Technology"] = df["Subtipo de Central"]
new_df["Set"] = df["Sistema"]
new_df["Country"] = "Ecuador"
new_df["Capacity"] = df["Potencia Efectiva (MW)"]
new_df["lat"] = df["Latitud"]
new_df["lon"] = df["Longitud"]

# === Step 4: Clean strings ===
def clean_text(value):
    """Remove accents, replace spaces with underscores, and handle NaN safely."""
    if pd.isna(value):
        return ""
    value = str(value)
    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('utf-8')
    value = value.replace(" ", "_")
    return value.strip()

for col in new_df.select_dtypes(include=['object']).columns:
    new_df[col] = new_df[col].apply(clean_text)

# === Step 5: Ensure unique names ===
def make_unique(series):
    """Append _2, _3, etc. to duplicated names."""
    counts = {}
    unique_names = []
    for name in series:
        if name not in counts:
            counts[name] = 1
            unique_names.append(name)
        else:
            counts[name] += 1
            unique_names.append(f"{name}_{counts[name]}")
    return unique_names

if new_df["Name"].duplicated().any():
    print("‚ö†Ô∏è Duplicate names detected ‚Äî renaming...")
    new_df["Name"] = make_unique(new_df["Name"])

assert new_df["Name"].is_unique, "‚ùå Duplicate names still exist after renaming."

# Temp Date In so they are by default included in the evaluation

new_df["DateIn"]= 2000  
# === Step 6: Export cleaned file ===
ppl_path = os.path.join(os.getcwd(), "pypsa-earth", "data", "ppl", "EC")
os.makedirs(ppl_path, exist_ok=True)
ppl_file = os.path.join(ppl_path, "powerplants_existing.csv")

new_df.to_csv(ppl_file, index=False, encoding="utf-8")

import copy
ppl_existent = copy.deepcopy(new_df)

print(f"‚úÖ New CSV file created and cleaned: {ppl_file}")
print(f"‚úÖ All {len(new_df)} plant names are unique.")


In [None]:
"""
Plot Ecuadorian power plants by technology and capacity (English labels).

Uses:
- Color: Technology type (translated to English)
- Bubble size: Capacity (log scale)
Requires `new_df` DataFrame with columns:
  ['Name', 'Technology', 'Capacity', 'lat', 'lon']


import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from matplotlib.cm import get_cmap
from pathlib import Path
import os

# ---------------------------------------------------------------------
# Load generator data
# ---------------------------------------------------------------------
# Assuming new_df is already defined from the previous script
# new_df = pd.read_csv("powerplants_temp.csv")

plot_df = new_df.dropna(subset=["lat", "lon", "Capacity"]).copy()
plot_df = plot_df[plot_df["Capacity"].astype(float) > 0]

# ---------------------------------------------------------------------
# Translate Technology names (Spanish ‚Üí English)
# ---------------------------------------------------------------------
tech_translation = {
    "Pasada": "Run-of-river hydro",
    "Embalse": "Reservoir hydro",
    "E√≥lica": "Wind",
    "Eolica": "Wind",
    "Fotovoltaica": "Solar PV",
    "MCI": "Internal combustion",
    "Turbog√°s": "Gas turbine",
    "Turbovapor": "Steam turbine",
    "Biomasa": "Biomass",
    "Biog√°s": "Biogas",
}

plot_df["Technology_Eng"] = plot_df["Technology"].map(tech_translation).fillna(plot_df["Technology"])

# ---------------------------------------------------------------------
# Load Ecuador boundary (GADM)
# ---------------------------------------------------------------------
country_gadm = "ECU"
GADM_filename = f"gadm41_{country_gadm}.gpkg"
GADM_inputfile_gpkg = os.path.join(
    os.getcwd(), "pypsa-earth", "data", "gadm", GADM_filename
)
assert Path(GADM_inputfile_gpkg).is_file(), f"GADM file not found: {GADM_inputfile_gpkg}"

ecuador_shape = gpd.read_file(GADM_inputfile_gpkg, layer="ADM_ADM_1")

# ---------------------------------------------------------------------
# Color map by technology (English)
# ---------------------------------------------------------------------
technologies = plot_df["Technology_Eng"].unique()
n_colors = len(technologies)
cmap = get_cmap("tab10") if n_colors <= 10 else get_cmap("tab20")
tech_to_color = {tech: cmap(i / n_colors) for i, tech in enumerate(technologies)}

# ---------------------------------------------------------------------
# Compute bubble size (log-scaled by capacity)
# ---------------------------------------------------------------------
cap = plot_df["Capacity"].astype(float)
plot_df["size"] = np.log10(cap + 1) * 40  # adjust multiplier if needed

# ---------------------------------------------------------------------
# Plot setup
# ---------------------------------------------------------------------
fig, ax = plt.subplots(figsize=(10, 8), subplot_kw={"projection": ccrs.PlateCarree()})
ax.set_title("Ecuador Power Plants by Technology", fontsize=14)

# Plot boundaries
ecuador_shape.boundary.plot(ax=ax, linewidth=0.4, color="black", alpha=0.7)

# Plot generators
for tech, group in plot_df.groupby("Technology_Eng"):
    ax.scatter(
        group["lon"].astype(float),
        group["lat"].astype(float),
        s=group["size"],
        color=tech_to_color[tech],
        label=tech,
        alpha=0.7,
        transform=ccrs.PlateCarree(),
        edgecolors="black",
        linewidth=0.3,
    )

# ---------------------------------------------------------------------
# Legend
# ---------------------------------------------------------------------
ax.legend(
    title="Technology Type",
    loc="upper left",
    bbox_to_anchor=(1.05, 1),
    borderaxespad=0,
)
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")

plt.tight_layout()
plt.show()
"""

# Newer power plants
New power plants that according the master plan shall come into operation



In [None]:
import os
import pandas as pd
import unicodedata
import numpy as np

# === Paths ===
generation_path = os.path.join(os.getcwd(), "documentation", "ecuador_data")
input_file = os.path.join(generation_path, "generation_future.xlsx")
assert os.path.isfile(input_file), f"‚ùå Future generation file not found: {input_file}"


# === Load ===
df = pd.read_excel(input_file, sheet_name="Future")

# Expected input columns (for clarity):
# "A√±o de entrada en operaci√≥n","Proyecto / Central","Empresa / Instituci√≥n","Estado",
# "Inversi√≥n p√∫blica o privada","Tipo","Potencia [MW]","Energ√≠a media [GWh/a√±o]",
# "Provincia","Cant√≥n","Latitud","Longitud"

# === Target structure ===
columns = [
    "Name",
    "Fueltype",
    "Technology",
    "Set",
    "Country",
    "Capacity",
    "Efficiency",
    "Duration",
    "Volume_Mm3",
    "DamHeight_m",
    "StorageCapacity_MWh",
    "DateIn",
    "DateRetrofit",
    "DateOut",
    "lat",
    "lon",
    "EIC",
    "projectID",
]
new_df = pd.DataFrame(columns=columns)

# === Map available data ===
new_df["Name"] = df["Proyecto / Central"]
new_df["Fueltype"] = df["Tipo"]  # keep as provided (normalized below)
new_df["Technology"] = ""  # not provided in this sheet
new_df["Set"] = "S.N.I."  # tag to distinguish dataset
new_df["Country"] = "Ecuador"
new_df["Capacity"] = pd.to_numeric(df["Potencia [MW]"], errors="coerce")
new_df["DateIn"] = pd.to_numeric(df["A√±o de entrada en operaci√≥n"], errors="coerce")
new_df["lat"] = pd.to_numeric(df["Latitud"], errors="coerce")
new_df["lon"] = pd.to_numeric(df["Longitud"], errors="coerce")

# Empty optional fields
for c in [
    "Efficiency",
    "Duration",
    "Volume_Mm3",
    "DamHeight_m",
    "StorageCapacity_MWh",
    "DateRetrofit",
    "DateOut",
    "EIC",
    "projectID",
]:
    new_df[c] = np.nan


# === String normalization (remove accents, spaces->underscores) ===
def clean_text(value):
    if pd.isna(value):
        return ""
    value = str(value)
    value = (
        unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("utf-8")
    )
    value = value.replace(" ", "_")
    return value.strip()


for col in ["Name", "Fueltype", "Technology", "Set", "Country", "EIC", "projectID"]:
    new_df[col] = new_df[col].apply(clean_text)


# === Ensure unique plant names ===
def make_unique(series):
    counts = {}
    out = []
    for name in series:
        if name not in counts:
            counts[name] = 1
            out.append(name)
        else:
            counts[name] += 1
            out.append(f"{name}_{counts[name]}")
    return out


if new_df["Name"].duplicated().any():
    print("‚ö†Ô∏è Duplicate names detected ‚Äî renaming...")
    # new_df["Name"] = make_unique(new_df["Name"])

assert new_df["Name"].is_unique, "‚ùå Duplicate names still exist after renaming."

tech = {
    "Hidroelectrico": "Embalse",
    "Termoelectrico": "MCI",
    "Eolico": "Eolica",
    "ERNC": "Fotovoltaica",
}

for i, row in new_df.iterrows():
    new_df.at[i,"Technology"] = tech[row["Fueltype"]]

# === Export ===

# === Step 6: Export cleaned file ===
ppl_path = os.path.join(os.getcwd(), "pypsa-earth", "data", "ppl", "EC")
os.makedirs(ppl_path, exist_ok=True)
out_file = os.path.join(ppl_path, "powerplants_future.csv")

# Use UTF-8 (PyPSA-friendly). If you want Excel-safe accents, use encoding='utf-8-sig'.
new_df.to_csv(out_file, index=False, encoding="utf-8")
ppl_future = copy.deepcopy(new_df)
print(f"‚úÖ Future CSV created: {out_file}")
print(f"‚úÖ Rows: {len(new_df)} | Unique names: {new_df['Name'].is_unique}")

#### Power Plants

In [None]:
# Add powerplants
import powerplantmatching as pm
from pathlib import Path
import pandas as pd


# print default dataset (data from Europe, currently not working)
#pm.powerplants()

In [None]:
# Load config file
ppmatching = os.path.join(
    os.getcwd(), "pypsa-earth", "configs", "powerplantmatching_config.yaml"
)
config = pm.get_config(ppmatching)

# Select target countries
config["target_countries"] = ["EC"]

In [None]:
import os
import pandas as pd

# === Step 1: Define the output path ===
ppl_path = os.path.join(os.getcwd(), "pypsa-earth", "data", "ppl", "EC")
os.makedirs(ppl_path, exist_ok=True)
out_file = os.path.join(ppl_path, "powerplants_all.csv")

# === Step 2: Concatenate vertically ===
# (both DataFrames must have the same columns)
combined_df = pd.concat([ppl_existent, ppl_future], axis=0, ignore_index=True)

# === Step 3: Verify structure ===
print(f"üß© Combined shape: {combined_df.shape}")
print(f"üìã Columns: {list(combined_df.columns)}")

# === Step 4: Export to CSV ===
# Use utf-8-sig for Excel compatibility if needed
combined_df.to_csv(out_file, index=False, encoding="utf-8-sig")

print(f"‚úÖ Combined power plants file saved at:\n   {out_file}")
print(f"‚úÖ Total plants: {len(combined_df)}")


In [None]:

import logging

ppl_path = os.path.join(os.getcwd(), "pypsa-earth", "data", "ppl", "EC")
ppl_file = os.path.join(ppl_path, "powerplants_all.csv")



if Path(ppl_file).is_file():
    ppl = pd.read_csv( ppl_file, index_col=0)
    logging.info(f"Powerplant file found: {ppl_file}, loading existing data.")

else:
    #creathe path if ppl_path does not exist
    if not os.path.exists(ppl_path):
        Path(ppl_path).mkdir(parents=True, exist_ok=True)
        logging.info(f"Created directory for powerplant data: {ppl_path}")

    # include solar and wind
    ppl = pm.powerplants(from_url=False, update=True, config_update=config).powerplant.fill_missing_decommissioning_years() 
    logging.info(f"Powerplant data loaded and saved to: {ppl_file}")
    # drop renewable power plants

    #ppl = pm.powerplants(from_url=False, update=True, config_update=config).powerplant.fill_missing_decommissioning_years().query('Fueltype not in ["Solar", "Wind"]') 


#### Information about the Power Plants

In [None]:
# Update ppl to match pypsa network
ppl_f = ppl.powerplant.fill_missing_commissioning_years()
ppl_p = ppl_f.powerplant.to_pypsa_names()
ppl_p = ppl_p.dropna(axis=1, how="all")



In [None]:
# Filter Power Plants by size and by conection to network
ppl_connected = ppl_p[ppl_p['component']=="S.N.I."]  # Keep only plants connected to the network
ppl_connected_current = ppl_connected[ppl_connected['DateIn']<=2017]  # Example: filter plants commissioned before or in 2024
ppl_connected_future = ppl_connected[ppl_connected['DateIn']>2017]  # Example: filter plants commissioned before or in 2024
ppl_filtered = ppl_connected[ppl_connected['p_nom']>=10]  # Example: filter plants with capacity >= 100 kW

total_capacity = ppl_p['p_nom'].sum()
total_connected = ppl_connected['p_nom'].sum()
total_connected_current = ppl_connected_current['p_nom'].sum()
total_filtered = ppl_filtered['p_nom'].sum()
logging.info(f"\nTotal capacity: {total_capacity} kW,\nConnected capacity: {total_connected} kW, \
             \nConnected current capacity: {total_connected_current} kW, \
             \nFiltered capacity: {total_filtered} kW")    



#### Attach Powerplants to the Nearest Bus

In [None]:
from scipy.spatial import cKDTree as KDTree
import numpy as np
import pandas as pd

# Choose substations (example: low-voltage)
substation_i = network.buses.query("substation_lv").index

# Build KDTree on bus coordinates
kdtree = KDTree(network.buses.loc[substation_i, ["x", "y"]].values)

# Match plants to nearest bus
tree_i = kdtree.query(ppl_filtered[["lon", "lat"]].values)[1]
ppl_filtered["bus"] = substation_i[tree_i].values
ppl_filtered.head()
ppl_filtered.to_csv("power_plants_all_mapped.csv")

#### Printing of Total Capacity

In [None]:
unique_carriers_es = (
    ppl_p["carrier"]
    .astype(str).str.strip().str.lower()
    .dropna().unique()
)
unique_carriers_es

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 1) Find unique carriers (Spanish)
unique_carriers_es = (
    ppl_p["carrier"]
    .astype(str).str.strip().str.lower()
    .dropna().unique()
)

# 2) Translate to English
carrier_map_es2en = {
    "hidraulica": "Hydro",
    "hidr√°ulica": "Hydro",
    "hidroelectrico": "Hydro",
    "termica": "Thermal",
    "t√©rmica": "Thermal",
    "termoelectrico": "Thermal",
    "biomasa": "Biomass",
    "fotovoltaica": "Solar PV",
    "eolica": "Wind",
    "e√≥lica": "Wind",
    "eolico": "Wind",
    "biogas": "Biogas",
    "biog√°s": "Biogas",
    "ernc": "Other ER",
}

# Normalise and map; fall back to title-cased original if not in mapping
ppl_p["carrier_norm"] = ppl_p["carrier"].astype(str).str.strip().str.lower()
ppl_p["carrier_en"] = ppl_p["carrier_norm"].map(carrier_map_es2en).fillna(
    ppl_p["carrier"].astype(str).str.strip().str.title()
)

# Ensure capacity numeric (handles things like "1'500,00")
ppl_p["p_nom"] = (
    ppl_p["p_nom"].astype(str)
    .str.replace("'", "", regex=False)
    .str.replace(",", ".", regex=False)
)
ppl_p["p_nom"] = pd.to_numeric(ppl_p["p_nom"], errors="coerce").fillna(0.0)

# Aggregate by translated carrier
capacity_by_carrier = (
    ppl_p.groupby("carrier_en")["p_nom"]
    .sum()
    .sort_values(ascending=False)
)

total_capacity = capacity_by_carrier.sum()

# Plot
colors = plt.cm.tab20.colors[:len(capacity_by_carrier)]
plt.figure(figsize=(12, 6))
bars = plt.bar(capacity_by_carrier.index, capacity_by_carrier.values, color=colors)

# Labels with MW and %
for bar, val in zip(bars, capacity_by_carrier.values):
    plt.text(
        bar.get_x() + bar.get_width() / 2,
        bar.get_height() + 0.01 * capacity_by_carrier.max(),
        f"{val:.0f} MW\n({(val / total_capacity * 100):.1f}%)",
        ha="center", va="bottom", fontsize=10
    )

plt.ylabel("Installed Capacity (MW)")
plt.xlabel("Fuel Type")
plt.title("Total Installed Capacity by Fuel Type in Ecuador")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()


#### Buses and Powerplants

In [None]:
import matplotlib.pyplot as plt

# Define Coordinate Reference Systems (CRS)
geo_crs = 4326  # general geographic projection, not used for metric measures. "EPSG:4326" is the standard used by OSM and google maps
area_crs = 6933 # projection for area measurements only. Possible recommended values are Global Mollweide "ESRI:54009" (but 54009 is not supported by atlite, use 6933 instead)
# distance_crs = 3857  # projection for distance measurements only. Possible recommended values are "EPSG:3857" (used by OSM and Google Maps) -> currently not used

# Read coordinates from powerplants and buses
ppl_geometry = gpd.points_from_xy(ppl_filtered['lon'], ppl_filtered['lat'])
buses_geometry = gpd.points_from_xy(network.buses['lon'], network.buses['lat'])
ppl_capacities =gpd.GeoDataFrame(ppl_filtered, geometry=ppl_geometry, crs=geo_crs)
buses_capacities =gpd.GeoDataFrame(network.buses, geometry=buses_geometry, crs=geo_crs)


#### Attach Powerplants to the Nearest Bus

In [None]:
from scipy.spatial import cKDTree as KDTree
import numpy as np

substation_i = network.buses.query("substation_lv").index
kdtree = KDTree(network.buses.loc[substation_i, ["x", "y"]].values)
ppl_i = ppl_filtered.index

tree_i = kdtree.query(ppl_filtered.loc[ppl_i, ["lon", "lat"]].values)[1]
ppl_filtered.loc[ppl_i, "bus"] = substation_i.append(pd.Index([np.nan]))[tree_i]

#### Add Powerplants to the Network

In [None]:
network.madd("Generator", 
             ppl_filtered.index, bus=ppl_filtered.bus,
               p_nom=ppl_filtered.p_nom, carrier=ppl_filtered.carrier)

network.generators

In [None]:
network.generators  

In [None]:
import matplotlib.pyplot as plt

# Define the time horizon
years = range(2025, 2050)

# Build a dataframe with capacity per carrier per year
capacity_by_year_carrier = pd.DataFrame(index=years)

for year in years:
    # Plants active at this year
    active_ppl = ppl[(ppl["DateIn"].isna()) | (ppl["DateIn"] <= year)]
    # Sum capacity by carrier
    capacity_by_carrier = active_ppl.groupby("carrier")["p_nom"].sum()
    capacity_by_year_carrier.loc[year, capacity_by_carrier.index] = capacity_by_carrier.values

# Replace NaNs with 0
capacity_by_year_carrier = capacity_by_year_carrier.fillna(0)

# Order carriers by their final capacity in 2040
order = capacity_by_year_carrier.loc[2040].sort_values().index
capacity_by_year_carrier = capacity_by_year_carrier[order]

# Plot as stacked area
plt.figure(figsize=(12,7))
capacity_by_year_carrier.plot.area(ax=plt.gca(), alpha=0.85)

plt.title("Installed Capacity in Ghana by Carrier (2025‚Äì2040)")
plt.xlabel("Year")
plt.ylabel("Installed Capacity (MW)")
plt.grid(True, linestyle="--", alpha=0.5)
plt.legend(title="Carrier", loc="upper left", bbox_to_anchor=(1,1))
plt.tight_layout()
plt.show()




In [None]:
import pandas as pd

# Export generators to Excel
output_file = "network_generators.xlsx"
network.generators.to_excel(output_file)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Sample Data ---
# Note: The original code relied on 'ppl' data, which was not provided.
# This sample data simulates a dataframe with installed power plant capacity
# to make the script runnable and demonstrate the interpolation.
ppl = pd.DataFrame({
    "carrier": [
        "solar", "solar", "solar", "natural_gas", "natural_gas",
        "nuclear", "hydro", "oil"
    ],
    "p_nom": [
        300, 450, 275, 5500, 3000,
        200, 1000, 500
    ],
    "DateIn": [
        2025, 2026, 2028, 2025, 2027,
        2028, None, 2029
    ]
})

# Define the time horizon
years = range(2025, 2041)

# Build a dataframe with capacity per carrier per year, with a base case up to 2030
capacity_by_year_carrier = pd.DataFrame(index=years)

for year in years:
    # Plants active up to this year, excluding the interpolated carriers
    active_ppl = ppl[(ppl["DateIn"].isna()) | (ppl["DateIn"] <= year)]
    # Sum capacity for all carriers
    capacity_by_carrier = active_ppl.groupby("carrier")["p_nom"].sum()
    capacity_by_year_carrier.loc[year, capacity_by_carrier.index] = capacity_by_carrier.values

# Replace NaNs with 0
capacity_by_year_carrier = capacity_by_year_carrier.fillna(0)

# --- Interpolation Logic ---
# The user-provided values for 2035 and 2040
interpolated_carriers = ['solar', 'natural_gas', 'nuclear']
target_values = {
    2035: {'solar': 1020, 'natural_gas': 11050, 'nuclear': 600},
    2040: {'solar': 1665, 'natural_gas': 14650, 'nuclear': 1200}
}

# Create a temporary DataFrame for interpolation
interp_data = pd.DataFrame(index=years)

for carrier in interpolated_carriers:
    # Get the value at 2030 from the initial data
    base_value = capacity_by_year_carrier.loc[2030, carrier]
    
    # Set up the series with known values for interpolation
    values = pd.Series(index=years, dtype=float)
    values.loc[2030] = base_value
    values.loc[2035] = target_values[2035][carrier]
    values.loc[2040] = target_values[2040][carrier]
    
    # Linearly interpolate the values between 2030 and 2040
    interp_values = values.interpolate(method='linear', limit_direction='forward')
    
    # Fill the temporary DataFrame
    interp_data[carrier] = interp_values

# Merge the interpolated data back into the main DataFrame
# First, fill the years 2031-2040 for the interpolated carriers with NaNs
# to ensure they are replaced by the interpolated values
capacity_by_year_carrier.loc[2031:2040, interpolated_carriers] = np.nan
capacity_by_year_carrier.update(interp_data)

# Re-order carriers for consistent plotting
order = capacity_by_year_carrier.loc[2040].sort_values().index
capacity_by_year_carrier = capacity_by_year_carrier[order]

# Plot as stacked area
plt.figure(figsize=(12,7))
capacity_by_year_carrier.plot.area(ax=plt.gca(), alpha=0.85)

plt.title("Installed Capacity in Ghana by Carrier (2025‚Äì2040)")
plt.xlabel("Year")
plt.ylabel("Installed Capacity (MW)")
plt.grid(True, linestyle="--", alpha=0.5)
plt.legend(title="Carrier", loc="upper left", bbox_to_anchor=(1,1))
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Sample Data ---
ppl = pd.DataFrame({
    "carrier": [
        "solar", "solar", "solar", "natural_gas", "natural_gas",
        "nuclear", "hydro", "oil"
    ],
    "p_nom": [
        300, 450, 275, 5500, 3000,
        200, 1000, 500
    ],
    "DateIn": [
        2025, 2026, 2028, 2025, 2027,
        2028, None, 2029
    ]
})

# Define the time horizon
years = range(2025, 2041)

# Build a dataframe with capacity per carrier per year, with a base case up to 2030
capacity_by_year_carrier = pd.DataFrame(index=years)

for year in years:
    active_ppl = ppl[(ppl["DateIn"].isna()) | (ppl["DateIn"] <= year)]
    capacity_by_carrier = active_ppl.groupby("carrier")["p_nom"].sum()
    capacity_by_year_carrier.loc[year, capacity_by_carrier.index] = capacity_by_carrier.values

capacity_by_year_carrier = capacity_by_year_carrier.fillna(0)

# --- Interpolation Logic ---
interpolated_carriers = ['solar', 'natural_gas', 'nuclear']
target_values = {
    2035: {'solar': 1020, 'natural_gas': 11050, 'nuclear': 600},
    2040: {'solar': 1665, 'natural_gas': 14650, 'nuclear': 1200}
}

interp_data = pd.DataFrame(index=years)
for carrier in interpolated_carriers:
    base_value = capacity_by_year_carrier.loc[2030, carrier]
    values = pd.Series(index=years, dtype=float)
    values.loc[2030] = base_value
    values.loc[2035] = target_values[2035][carrier]
    values.loc[2040] = target_values[2040][carrier]
    interp_values = values.interpolate(method='linear', limit_direction='forward')
    interp_data[carrier] = interp_values

capacity_by_year_carrier.loc[2031:2040, interpolated_carriers] = np.nan
capacity_by_year_carrier.update(interp_data)

# Re-order carriers
order = capacity_by_year_carrier.loc[2040].sort_values().index
capacity_by_year_carrier = capacity_by_year_carrier[order]

# --- CAPEX values ---
capex_data = {
    2030: {'nuclear': 2790000000, 'natural_gas': 2621700000, 'solar': 582750000},
    2040: {'nuclear': 8370000000, 'natural_gas': 7339500000, 'solar': 992250000}
}

# --- Plot ---
plt.figure(figsize=(12,7))
ax = capacity_by_year_carrier.plot.area(ax=plt.gca(), alpha=0.85)

plt.title("Installed Capacity in Ghana by Carrier (2025‚Äì2040)")
plt.xlabel("Year")
plt.ylabel("Installed Capacity (MW)")
plt.grid(True, linestyle="--", alpha=0.5)
plt.legend(title="Carrier", loc="upper left", bbox_to_anchor=(1,1))

# Add CAPEX annotations
for year, carriers in capex_data.items():
    for carrier, capex in carriers.items():
        y_val = capacity_by_year_carrier.loc[year, carrier]
        if y_val > 0:  # annotate only if capacity exists
            ax.annotate(
                f"${capex/1e9:.2f}B",  # format in billions
                xy=(year, y_val),
                xytext=(year, y_val + 1000),
                ha="center",
                fontsize=12,  # üîπ bigger text
                fontweight="bold",  # üîπ bold text
                arrowprops=dict(arrowstyle="->", color="black", lw=1.2)  # thicker arrow
            )

plt.tight_layout()
plt.show()


In [None]:
# Load cost data
filename = 'resources/' + country_code + '/costs_nuclear.csv' # The CSV file has been created with PyPSA-Earth

#TODO: Calculate annuity for capital cost, check marginal cost with variable maintenance
costs = pd.read_csv(filename, index_col=0)
costs

In [None]:
network.generators.drop(['marginal_cost', 'capital_cost'], axis=1, inplace=True)

# Add cost data from 'costs' to 'network.generators'
network.generators = pd.merge(network.generators, costs, left_on='carrier', right_index=True)
network.generators[['bus', 'p_nom', 'carrier', 'marginal_cost', 'capital_cost']]

In [None]:
# TODO: Create a function to calculate the demand profile
# Hourly demand profile per bus
load_profile_file = 'resources/' + country_code + '/demand_profiles.csv' # The CSV file has been created with PyPSA-Earth
load_profile = pd.read_csv(load_profile_file, index_col=0, parse_dates=True)

# The peak load for Colombia is less than the installed hydropower capacity, let's increase it by 40%
if country_code == 'CO':
    load_factor = 1.4
else:
    load_factor = 1
load_profile = load_profile * load_factor
load_profile

#### Ghana Demand

In [None]:
# Add the load profile to the network
network.madd('Load', load_profile.columns, bus=load_profile.columns, p_set=load_profile)
network.loads_t.p_set

In [None]:
# Check peak load (rounded to 1 decimal place)
print('Peak load: ', round(network.loads_t.p_set.T.sum().max() / 1000, 1), 'GW') # GW

#### Region Cluster

In [None]:
def get_node_by_region(region_node):
    """Consider each region as a node.
    The centroid of the polygon for regions is the position of the node.

    Important:
    The projection of the maps is ``EPSG:4674 (SIRGAS 2000) <https://epsg.io/4674>``, unit- degree, geographic CRS.
    To use calculations, e.g, "centroid", "sjoin",  the map needs to be reprojected, ``.to_crs('epsg:4087')``

    EPSG:4326 is also a geographic CRS, unit - degree,
    # issue: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
    # previous: to_crs('epsg:4326'), now: set .to_crs('epsg:4087') solved: see https://gis.stackexchange.com/questions/372564/userwarning-when-trying-to-get-centroid-from-a-polygon-geopandas

    """

    region_node['x'] = region_node.centroid.x
    region_node['y'] = region_node.centroid.y
    return region_node

region_node = get_node_by_region(shapefile)
region_node.head()

In [None]:
from shapely.geometry import Point

# Attach node to region
def attach_node_to_region(gdf, longitude, latitude):
    # Define the point (longitude, latitude)
    point = Point(longitude, latitude)
    # Ensure the CRS of GeoDataFrame and point match
    if gdf.crs != 'EPSG:4326': # Assuming the point is in WGS84 (EPSG:4326)
        gdf = gdf.to_crs('EPSG:4326')
    # Find the polygon that contains the point and get the coordinates of the centroid
    region = gdf[gdf.contains(point)]['NAME_1'].values[0]
    region_x = gdf[gdf.contains(point)]['x'].values[0]
    region_y = gdf[gdf.contains(point)]['y'].values[0]
    return region, region_x, region_y

In [None]:
# Attach region to buses using the attach_node_to_region function
def attach_region_to_buses(gdf, buses):
    buses['region'] = 'NA'
    for i in buses.index:
        lon = buses.loc[i]['lon']
        lat = buses.loc[i]['lat']
        try:
            region, region_x, region_y = attach_node_to_region(gdf, lon, lat)
        except Exception as e:
            print('Error:', e)
            print('Index:', i)
            continue
        buses.loc[i, 'region'] = region
        buses.loc[i, 'region_x'] = region_x
        buses.loc[i, 'region_y'] = region_y
    return buses

In [None]:
# create busmap
busmap = attach_region_to_buses(region_node, network.buses)['region']
busmap

In [None]:
network.lines = network.lines.reindex(columns=network.components['Line']['attrs'].index[1:])
network.lines['type'] = np.nan
network.buses = network.buses.reindex(columns=network.components['Bus']['attrs'].index[1:])
network.buses['frequency'] = 50

In [None]:
# Cluster the network based on the busmap
from pypsa.clustering.spatial import get_clustering_from_busmap
clustered = get_clustering_from_busmap(network, busmap).network
clustered

In [None]:
# Print the generators
clustered.generators

In [None]:
clustered.buses

In [None]:
clustered.lines

In [None]:
clustered.loads

In [None]:
import atlite

# Load the cutout
cutout_file = 'cutouts/' + country_code + '/cutout-2013-era5.nc'
cutout = atlite.Cutout(cutout_file)
cutout

In [None]:
import matplotlib.pyplot as plt
import cartopy.crs as ccrs

# Calculate the wind and influx 
wnd100m = cutout.data.wnd100m.mean(dim="time")
influx = cutout.data['influx_direct'].mean(dim="time") + cutout.data['influx_diffuse'].mean(dim="time")

# Get the total bounds of the shapefile
minx, miny, maxx, maxy = shapefile.total_bounds

# Plot the wind and influx
# TODO: check the units and print them on the legend
import matplotlib.pyplot as plt
import cartopy.crs as ccrs

# Create a figure with two subplots
fig, ax = plt.subplots(1, 2, subplot_kw={'projection': ccrs.PlateCarree()}, figsize=(15, 7))

# Set titles for each subplot
# TODO: titles not visible
ax[0].set_title("Mean Wind Potential in m/s", fontsize=14, pad=20)
ax[0].set_extent([minx, maxx, miny, maxy], crs=ccrs.PlateCarree())
ax[1].set_title("Mean Influx in W/m2", fontsize=14)
ax[1].set_extent([minx, maxx, miny, maxy], crs=ccrs.PlateCarree())

# Plot data on the first subplot
wnd100m.plot(ax=ax[0], cmap="Blues")
shapefile.to_crs(geo_crs).plot(ax=ax[0], edgecolor="k", color="none")

# Plot data on the second subplot
influx.plot(ax=ax[1], cmap="Reds")
shapefile.to_crs(geo_crs).plot(ax=ax[1], edgecolor="k", color="none")

# Adjust layout to prevent overlap
plt.tight_layout(rect=[0, 0, 1, 0.95])

# Adjust the space around the titles if necessary
#plt.subplots_adjust(top=5)  # Adjust the value if needed

# Display the figure
plt.show()

In [None]:
shapefile

In [None]:
import xarray as xr

# Calculate the area of each cell in the cutout
area = cutout.grid.to_crs(area_crs).area / 1e6
area = xr.DataArray(area.values.reshape(cutout.shape), [cutout.coords["y"], cutout.coords["x"]])

area

In [None]:
# Define renewable resources
resources = [
    {'method': 'wind', 'turbine': 'Vestas_V112_3MW', 'capacity_per_sqkm': 4.6, 'resource': 'onwind'},
    {'method': 'pv', 'panel': 'CSi', 'orientation': 'latitude_optimal', 'capacity_per_sqkm': 2, 'resource': 'solar'},
]
correction_factor = 1

# Create new geodataframe for the capacities
capacities = shapefile
datasets = {}
shape = shapefile.set_index("NAME_1")

# Calculate capacities per resource
# TODO: check the algorithm for correctness and efficiency
for resource in resources:
    method =  resource['method']
    res = resource['resource']
    profile_path = 'resources/' + country_code + '/renewable_profiles/profile_' + res + '.nc'
    if Path(profile_path).is_file():
        print('Profile found:', res)
        ds = xr.open_dataset(profile_path)
        profile = ds['profile']
        capacity = ds['capacities']
        capacities[method] = capacity
        datasets[res] = ds
    else:
        print('Profile not found:', res)
        cap_per_sqkm = resource['capacity_per_sqkm']
        print(method)
        params = [resource.pop(key) for key in ['method', 'capacity_per_sqkm', 'resource']][0]
        print(resource)
        func = getattr(cutout, params)
        capacity_factor = correction_factor * func(capacity_factor=True, **resource)
        #print(capacity_factor)
        layout = capacity_factor * area * cap_per_sqkm
        #print(layout)
        profile, capacity = func(shapes=shape, per_unit=True, return_capacity=True, layout=layout, **resource)
        #print(capacity)
        capacities[method] = capacity
        ds = xr.Dataset({
        'profile': profile.rename({'NAME_1': 'bus'}),
        'capacities': capacity.rename({'NAME_1': 'bus'})
        })
        datasets[res] = ds
        ds.to_netcdf(profile_path)

datasets

In [None]:
capacities['area'] = capacities.to_crs(area_crs).area / 1e6 # convert to km2
capacities['area'] = capacities['area'].round(1)
capacities['wind'] = capacities['wind'].round(1)
capacities['pv'] = capacities['pv'].round(1)
capacities['wind_per_sqkm'] = capacities['wind'] / capacities['area']
capacities['pv_per_sqkm'] = capacities['pv'] / capacities['area']
capacities

In [None]:
# Plot wind and solar potential in subplots
# TODO: check units
ncols = 3
nrows = 2
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, subplot_kw={'projection': ccrs.PlateCarree()}, figsize=(20, 15))

for i in range(nrows):
    for j in range(ncols):
        ax[i, j].set_extent([minx, maxx, miny, maxy], ccrs.PlateCarree())

capacities.plot(ax=ax[0, 0], column="wind", legend=True, cmap="Blues", )
ax[0,0].set_title("Wind Potential in MW")
capacities.apply(lambda x: ax[0,0].annotate(text=x['NAME_1'], xy=x.geometry.centroid.coords[0], fontsize=10, ha='center'), axis=1)
capacities.plot(ax=ax[0, 1], column="wind_per_sqkm", legend=True, cmap="Blues")
capacities.apply(lambda x: ax[0,1].annotate(text=x['NAME_1'], xy=x.geometry.centroid.coords[0], fontsize=10, ha='center'), axis=1)
ax[0,1].set_title('Wind Potential in MW per km¬≤')
wnd100m.plot(ax=ax[0, 2], cmap="Blues")
ax[0,2].set_title("Wind Resource in W/m¬≤")
shapefile.to_crs(geo_crs).plot(ax=ax[0, 2], edgecolor="k", color="none")

# PV
capacities.plot(ax=ax[1, 0], column="pv", legend=True, cmap="Reds")
ax[1,0].set_title("PV Potential in MW")
capacities.apply(lambda x: ax[1,0].annotate(text=x['NAME_1'], xy=x.geometry.centroid.coords[0], fontsize=10, ha='center'), axis=1)
capacities.plot(ax=ax[1, 1], column="pv_per_sqkm", legend=True, cmap="Reds")
ax[1,1].set_title('PV Potential in MW per km¬≤')
capacities.apply(lambda x: ax[1,1].annotate(text=x['NAME_1'], xy=x.geometry.centroid.coords[0], fontsize=10, ha='center'), axis=1)
influx.plot(ax=ax[1, 2], cmap="Reds")
ax[1,2].set_title("Solar Resource in W/m¬≤")
shapefile.to_crs(geo_crs).plot(ax=ax[1, 2], edgecolor="k", color="none")
plt.show()

In [None]:
# Load the emissions data
carriers = pd.read_csv('resources/' + country_code + '/carriers.csv', index_col=0).reset_index()

# Combine CCGT and OCGT to Natural Gas
carriers.loc[carriers.Carrier == 'OCGT', 'Carrier'] = 'natural gas'
carriers.loc[carriers.nice_name == 'Open-Cycle Gas', 'nice_name'] = 'Natural Gas'
carriers = carriers[carriers.Carrier != 'CCGT']

# Set the index to the carrier name
carriers.set_index('Carrier', inplace=True)

# Add carriers to the network
clustered.carriers = carriers
clustered.carriers

In [None]:
# Create a new network with renewables
clustered_re = clustered.copy()

# Iterate over the datasets in the dictionary to access keys and values
for key, ds in datasets.items():
    tech = key
    # Filter out non-existing buses
    # TODO: Check why there is a 'NA' bus in Colombia
    try:
        ds = ds.sel(bus=clustered.buses.index.drop('NA')) # Drop 'NA' because it creates an error
    except:
        ds = ds.sel(bus=clustered.buses.index)
    # Add renewable generators
    clustered_re.madd(
        "Generator",
        ds.indexes["bus"],
        " " + tech,
        bus=ds.indexes["bus"],
        carrier=tech,
        p_nom=0,
        p_nom_extendable=True,
        p_nom_min=0,
        p_nom_max=ds["capacities"].to_pandas(),
        p_max_pu=ds["profile"].transpose("time", "bus").to_pandas(),
        marginal_cost=costs.at[tech, "marginal_cost"],
        capital_cost=costs.at[tech, "capital_cost"],
    )



#### Solve Network

In [None]:
# Optimize the old network
clustered.lines['x'] = 0.1
clustered.lines['r'] = 0.01
solver = 'gurobi'
clustered.optimize()#solver_name=solver)

#### Example: New Shares

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


# 2) Current sector shares you provided (use these to estimate current MW by sector)
current_shares = {
    "Industry": 0.32,
    "Residential": 0.47,
    "Transport": 0.0003,
    "Agriculture": 0.0003,
    "Service": 0.212
}

current_capacity = {s: share * total_energy for s, share in current_shares.items()}

# 3) 2040 target total capacity (MW)
target_total_mwh = total_energy2040 

# User-provided target shares (note: they sum to 106%)
raw_target_shares = {
    "Industry": 0.48,
    "Residential": 0.40,
    "Transport": 0.01,
    "Agriculture": 0.01,
    "Service": 0.16
}

raw_sum = sum(raw_target_shares.values())
print(f"Sum of raw target shares: {raw_sum*100:.2f}%")

# Normalize target shares so they sum to 1.0 (100%)
target_shares = {s: v / raw_sum for s, v in raw_target_shares.items()}

print("Normalized target shares (used to compute MWh):")
for s, v in target_shares.items():
    print(f"  {s}: {v*100:.2f}%")

target_capacity = {s: share * target_total_mwh for s, share in target_shares.items()}

# 5) Compute required additions (MW)
additional_capacity = {s: max(target_capacity[s] - current_capacity.get(s, 0.0), 0.0) for s in target_capacity}

# Prepare a DataFrame summary for neat printing / plotting
df = pd.DataFrame({
    "current_mwh": pd.Series(current_capacity),
    "target_mwh": pd.Series(target_capacity),
    "additional_mwh": pd.Series(additional_capacity),
    "current_share_%": pd.Series(current_shares).mul(100),
    "target_share_%": pd.Series(target_shares).mul(100)
}).fillna(0.0)

print("\nSummary (MWh and shares):")
print(df.round(2))

# 6) Plot grouped bar chart: current vs 2040 target per sector
sectors = df.index.tolist()
x = np.arange(len(sectors))
width = 0.35

colors = plt.cm.Set2.colors  # palette with distinct colors
sector_colors = colors[:len(sectors)]

fig, ax = plt.subplots(figsize=(11,6))
bars_curr = ax.bar(x - width/2, df["current_mwh"].values, width, label="Current (MWh)", color=sector_colors, alpha=0.9)
bars_targ = ax.bar(x + width/2, df["target_mwh"].values, width, label="Target 2040 (MWh)", color=sector_colors, alpha=0.45, hatch='//')

# Add labels above each bar: MW and share%
def autolabel(bars, values_share_pct):
    for bar, share in zip(bars, values_share_pct):
        yval = bar.get_height()
        ax.text(
            bar.get_x() + bar.get_width()/2,
            yval + 0.01 * max(df["target_mwh"].max(), df["current_mwh"].max()),
            f"{yval:,.0f} MWh\n({share:.1f}%)",
            ha='center', va='bottom', fontsize=9
        )

# Current labels use current_share_%, target labels use target_share_%
autolabel(bars_curr, df["current_share_%"].values)
autolabel(bars_targ, df["target_share_%"].values)

ax.set_ylabel("Energy (MWh)")
ax.set_title("Current vs Target (2040) Energy by Sector\nTarget total (normalized shares)")
ax.set_xticks(x)
ax.set_xticklabels(sectors, rotation=25, ha="right")
ax.legend()
plt.tight_layout()
plt.show()
