# PCA + k-Means on ERA5 Data

## Results

- Data loading takes ~5min
- Data preprocessing takes ~4min
- PCA with 500 PCs takes ~8min
- PCA (reduced PCs), Transforming and clustering (2 x k-Means) takes ~2min
- kPCA with 500 PCs takes ~2min
- kPCA (reduced PCs), Transforming and clustering (2 x k-Means) takes ~3min
- PCA:
  - N_pcs=24 cover 0.7 of the variance
  - N_pcs=80 cover 0.8 of the variance
  - N_pcs=171 cover 0.85 of the variance
  - N_pcs=421 cover 0.9 of the variance
- kPCA:
  - N_pcs=1 cover 0.4 of the variance
  - N_pcs=2 cover 0.5 of the variance
  - N_pcs=3 cover 0.6000000000000001 of the variance
  - N_pcs=6 cover 0.7000000000000001 of the variance
  - N_pcs=15 cover 0.8 of the variance
  - N_pcs=54 cover 0.9 of the variance

In [None]:
%load_ext autoreload
%autoreload 2
import datetime
import pathlib
import joblib

import sklearn.cluster
import sklearn.decomposition
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import torch
import pandas as pd

import a6
import a6.plotting._colors as _colors

path = pathlib.Path(
    "/p/project/deepacf/emmerich1/data/ecmwf_era5/era5_pl_1964_2023_12.nc"
)
plots = pathlib.Path("/p/project/deepacf/emmerich1/plots")

In [None]:
%%time

ds = xr.open_dataset(path)

coordinates = a6.datasets.coordinates.Coordinates()
variables = a6.datasets.variables.Model()

ds

In [None]:
%%time

masked = a6.datasets.methods.mask.set_nans_to_mean(ds, coordinates=coordinates)
data = (
    (
        a6.features.methods.weighting.weight_by_latitudes(
            latitudes=coordinates.latitude,
            use_sqrt=True,
        )
        >> a6.features.methods.reshape.xarray.reshape_spatio_temporal_data(
            # Set to None to avoid memory excess in function
            time_coordinate=None,
        )
        >> a6.features.methods.standardization.normalize_features()
    )
    .apply_to(masked)
    .compute()
)
del ds
del masked

In [None]:
%%time

# data.to_netcdf("/p/project/deepacf/emmerich1/data/ecmwf_era5/era5_pl_1964_2023_12_preprocssed_for_pca.nc")
# del data

data = (
    xr.open_dataset(
        "/p/project/deepacf/emmerich1/data/ecmwf_era5/era5_pl_1964_2023_12_preprocssed_for_pca.nc"
    )
    .to_dataarray()
    .values[0]
)

In [None]:
%%time

# pca = sklearn.decomposition.PCA(n_components=500).fit(data)
pca = joblib.load("/p/project/deepacf/emmerich1/data/pca_500_pcs.joblib")
cum_evr = np.cumsum(pca.explained_variance_ratio_)

In [None]:
%%time

plt.plot(list(range(pca.n_components_)), cum_evr)
for var in np.arange(0, 1.0, 0.1):
    n_pcs = np.where(cum_evr > var)[0][0]
    print(f"N_pcs={n_pcs} cover {var} of the variance")
n_pcs = np.where(cum_evr > 0.80)[0][0]
transformed = sklearn.decomposition.PCA(n_components=n_pcs).fit_transform(data)
kmeans_pca_40 = sklearn.cluster.KMeans(n_clusters=40).fit(transformed)
kmeans_pca_30 = sklearn.cluster.KMeans(n_clusters=30).fit(transformed)

kmeans_pca_40_labels = kmeans_pca_40.labels_
kmeans_pca_30_labels = kmeans_pca_30.labels_

In [None]:
%%time
joblib.dump(pca, "/p/project/deepacf/emmerich1/data/pca_500_pcs.joblib")
joblib.dump(
    kmeans_pca_40, "/p/project/deepacf/emmerich1/data/kmeans_pca_40.joblib"
)
joblib.dump(
    kmeans_pca_30, "/p/project/deepacf/emmerich1/data/kmeans_pca_30.joblib"
)

del pca
del transformed
del kmeans_pca_40
del kmeans_pca_30

Kernel PCA: Gaussian radial basis function with $\sigma = 200$ (see http://dx.doi.org/10.1016/j.procs.2011.08.043)

In [None]:
%%time

kpca = sklearn.decomposition.KernelPCA(
    n_components=500,
    copy_X=False,
).fit(data)

In [None]:
%%time
cum_evr = np.cumsum(kpca.eigenvalues_ / np.sum(kpca.eigenvalues_))
plt.plot(list(range(len(kpca.eigenvalues_))), cum_evr)
for var in np.arange(0, 1.0, 0.1):
    n_pcs = np.where(cum_evr > var)[0][0]
    print(f"N_pcs={n_pcs} cover {var} of the variance")
n_pcs = np.where(cum_evr > 0.80)[0][0]
transformed = sklearn.decomposition.KernelPCA(
    n_components=n_pcs, copy_X=False
).fit_transform(data)
kmeans_kpca_40 = sklearn.cluster.KMeans(n_clusters=40).fit(transformed)
kmeans_kpca_30 = sklearn.cluster.KMeans(n_clusters=30).fit(transformed)

kmeans_kpca_40_labels = kmeans_pca_40.labels_
kmeans_kpca_30_labels = kmeans_pca_30.labels_

In [None]:
%%time

joblib.dump(kpca, "/p/project/deepacf/emmerich1/data/kpca_500_pcs.joblib")
joblib.dump(
    kmeans_kpca_40, "/p/project/deepacf/emmerich1/data/kmeans_kpca_40.joblib"
)
joblib.dump(
    kmeans_kpca_30, "/p/project/deepacf/emmerich1/data/kmeans_kpca_30.joblib"
)

del kpca
del transformed
del kmeans_kpca_40
del kmeans_kpca_30

In [None]:
%%time

ds = xr.open_dataset(path)

# Add +1 to be conform to the GWL labels
kmeans_pca_40 = joblib.load(
    "/p/project/deepacf/emmerich1/data/kmeans_pca_40.joblib"
)
kmeans_pca_30 = joblib.load(
    "/p/project/deepacf/emmerich1/data/kmeans_pca_30.joblib"
)
kmeans_pca_40_labels = kmeans_pca_40.labels_ + 1
kmeans_pca_30_labels = kmeans_pca_30.labels_ + 1

kmeans_kpca_40 = joblib.load(
    "/p/project/deepacf/emmerich1/data/kmeans_kpca_40.joblib"
)
kmeans_kpca_30 = joblib.load(
    "/p/project/deepacf/emmerich1/data/kmeans_kpca_30.joblib"
)
kmeans_kpca_40_labels = kmeans_kpca_40.labels_ + 1
kmeans_kpca_30_labels = kmeans_kpca_30.labels_ + 1

# Need to convert dates to YYYY-MM-DD to be conform with GWL dataset time stamps.
times = a6.utils.times.get_time_steps_as_dates(ds, coordinates=coordinates)

results = xr.Dataset(
    data_vars={
        "PCA": (
            ["time", "k"],
            list(zip(kmeans_pca_30_labels, kmeans_pca_40_labels)),
        ),
        "kPCA": (
            ["time", "k"],
            list(zip(kmeans_kpca_30_labels, kmeans_kpca_40_labels)),
        ),
    },
    coords={
        "time": times,
        "k": [30, 40],
    },
    attrs={
        "description": "Resulting LSWR labels from K-Means clustering on PCA and kernel PCA of the ERA5 data (1964-2023)",
    },
)
results.to_netcdf(
    "/p/project/deepacf/emmerich1/data/pca_kpca_kmeans_lswrs_30_40.nc"
)
results

## Plotting

In [None]:
results = xr.open_dataset(
    "/p/project/deepacf/emmerich1/data/pca_kpca_kmeans_lswrs_30_40.nc"
)
results_40 = results.sel(k=40)
gwl = xr.open_dataset(
    "/p/home/jusers/emmerich1/juwels/code/a6/src/tests/data/gwl.nc"
)

dcv2 = torch.load(
    "/p/scratch/deepacf/emmerich1/dcv2/multi-level-all-fields-1964-2023/results/tensors/epoch-799-assignments.pt",
    map_location=torch.device("cpu"),
)
dcv2 = xr.DataArray(
    # Need to add +1 to be conform with GWL labels
    dcv2.numpy()[0] + 1,
    name="DCv2",
    coords={"time": results["time"]},
    dims=["time"],
)
lswrs = [gwl["GWL"], results_40["PCA"], results_40["kPCA"], dcv2]


dcv2.to_netcdf("/p/project/deepacf/emmerich1/data/dcv2-lswrs.nc")

In [None]:
import matplotlib.pyplot as plt

pca = joblib.load("/p/project/deepacf/emmerich1/data/pca_500_pcs.joblib")
kpca = joblib.load("/p/project/deepacf/emmerich1/data/kpca_500_pcs.joblib")

pca_cum_evr = np.cumsum(pca.explained_variance_ratio_)
kpca_evr = kpca.eigenvalues_ / np.sum(kpca.eigenvalues_)
kpca_cum_evr = np.cumsum(kpca_evr)

## Explained Variance Ratio Plots

In [None]:
fig, ax1 = plt.subplots(figsize=(6, 4))

ax1.set_xlabel("PCs")

x = list(range(1, pca.n_components + 1))

# Plot cumulative variance on first axis
ax1_color = "green"
ax1.set_ylabel("cumulative explained variance", color=ax1_color)
ax1.plot(x, pca_cum_evr, color=ax1_color, linestyle="-", label="PCA")
ax1.plot(x, kpca_cum_evr, color=ax1_color, linestyle="-.", label="kPCA")

# Create right axis.
ax2 = ax1.twinx()

# Plot the explained variance ratios.
ax2_color = "blue"
ax2.set_ylabel("explained variance ratio", color=ax2_color)
ax2.plot(
    x,
    pca.explained_variance_ratio_,
    color=ax2_color,
    linestyle="-",
    label="PCA",
)
ax2.plot(x, kpca_evr, color=ax2_color, linestyle="-.", label="kPCA")

for ax, color in [(ax1, ax1_color), (ax2, ax2_color)]:
    # Set log scale.
    ax.set(xscale="log", yscale="log")
    # Set left xlim such that the first tick disappears.
    ax.set_xlim(0.91, None)
    # Color the ticks.
    ax.tick_params(axis="y", colors=color, which="both")


# Plot vertical lince indicating variance excess.
variance = 0.8
for cum_evr, title in [(pca_cum_evr, "PCA"), (kpca_cum_evr, "kPCA")]:
    n_pcs = np.where(cum_evr > variance)[0][0]
    # Dashed line indicating the threshold.
    ax2.axvline(
        n_pcs,
        ymin=0,
        ymax=1.1,
        linestyle="dashed",
        color="grey",
    )
    ax2.text(
        1.04 * n_pcs,
        0.001,
        f"$N_{{PCs}} = {n_pcs}$ ({title})",
        rotation=90,
        color="grey",
    )
ax1.set_title("Explained Variances for PCA and kPCA")
ax2.legend()

fig.tight_layout()

plt.savefig(plots / "explained-variance-pca-kpca.pdf")

## LSWR Analysis

In [None]:
def plot_abundance(
    assignments: list[xr.DataArray],
) -> None:
    n_subplots = len(assignments)
    labels = np.arange(
        int(assignments[0].min()), int(assignments[0].max()) + 1, 1, dtype=int
    )
    x_lims = labels.min() - 0.5, labels.max() + 0.5
    bins = np.arange(x_lims[0], x_lims[1] + 1.0, 1.0)
    colors = _colors.create_colors_for_labels(labels)

    fig, axs = plt.subplots(
        figsize=(6, 2 * n_subplots),
        nrows=n_subplots,
        ncols=1,
        sharex=True,
        sharey=False,
    )

    plt.title(f"Abundance of LSWRs")

    for i, element in enumerate(assignments):
        _, _, patches = axs[i].hist(element, bins=bins, density=True)

        axs[i].set_title(element.name)

        if i == n_subplots - 1:
            axs[i].set_xlabel("LSWR")

        axs[i].set_ylabel("Abundance [%]")
        axs[i].set_xlim(*x_lims)
        axs[i].set_xticks(labels)
        axs[i].set_xticklabels(labels, rotation=90)
        axs[i].yaxis.grid(True)

        # ax2 = axs[i].twinx()
        # _, _, patches = ax2.hist(element, bins=bins, density=True)
        # ax2.set_ylabel("Relative [%]")

        if len(colors) != len(patches):
            raise RuntimeError(
                "Length of colors does not match number of patches in histogram"
            )

        for color, patch in zip(colors, patches):
            patch.set_facecolor(color)

    fig.tight_layout()
    plt.savefig(plots / "lswrs-abundance-comparison.pdf")


plot_abundance(lswrs)

In [None]:
%%time

import seaborn as sns
import itertools


def plot_transition_matrix_heatmap(
    assignments: list[xr.DataArray],
) -> None:
    n_subplots = len(assignments)
    width_ratios = [1 for _ in enumerate(assignments)] + [0.08]

    # Add 1 subplot just for the colorbar
    fig, axs = plt.subplots(
        figsize=(6 * n_subplots, 6),
        nrows=1,
        ncols=n_subplots + 1,
        gridspec_kw={"width_ratios": width_ratios},
    )

    # Last element of axs is the cbar
    for ax1, ax2 in itertools.pairwise(axs[:-1]):
        ax1.sharey(ax2)

    axs_cbar = axs[-1]
    transitions = np.array(
        [
            a6.plotting.transitions._calculate_markov_transition_matrix(a)
            for a in assignments
        ]
    )
    max_prob = transitions.max()
    min_prob = transitions.min()

    for i, element in enumerate(assignments):
        transition = transitions[i]
        kwargs = (
            {"cbar": False}
            if i < n_subplots - 1
            else {"cbar_ax": axs_cbar, "cbar_kws": {"label": "probability"}}
        )
        sns.heatmap(
            transition,
            ax=axs[i],
            cmap="Reds",
            vmin=min_prob,
            vmax=max_prob,
            **kwargs
        )
        axs[i].set_title(element.name)
    fig.tight_layout()
    plt.savefig(plots / "lswrs-transition-probabilities-comparison.pdf")


plot_transition_matrix_heatmap(lswrs)

In [None]:
def plot_modes_durations(
    assignments: list[xr.DataArray],
) -> tuple[plt.Figure, plt.Axes]:
    """Plot the mode mean durations and standard deviation."""
    n_subplots = len(assignments)
    modes = [
        a6.modes.methods.determine_lifetimes_of_modes(a) for a in assignments
    ]
    labels = np.arange(1, modes[0].size + 1, 1, dtype=int)

    x_lims = labels.min() - 0.5, labels.max() + 0.5
    colors = _colors.create_colors_for_labels(labels)

    fig, axs = plt.subplots(
        figsize=(6, 2 * n_subplots),
        nrows=n_subplots,
        ncols=1,
        sharex=True,
        sharey=False,
    )

    plt.title(f"Duration of LSWRs")

    for i, element in enumerate(modes):
        (
            durations,
            stds,
        ) = a6.plotting.modes.statistics._calculate_mean_durations_and_standard_deviations(
            element
        )
        axs[i].bar(
            labels,
            durations,
            yerr=stds,
            width=1.0,  # removes gaps between the bars
            color=colors,
            align="center",
            alpha=1,
            ecolor="black",
            capsize=3,
        )

        axs[i].set_title(assignments[i].name)

        if i == n_subplots - 1:
            axs[i].set_xlabel("LSWR")

        axs[i].set_ylabel("Mean duration [days]")
        axs[i].set_xlim(*x_lims)
        axs[i].set_xticks(labels)
        axs[i].set_xticklabels(labels, rotation=90)
        axs[i].yaxis.grid(True)

    fig.tight_layout()
    plt.savefig(plots / "lswrs-durations-comparison.pdf")


plot_modes_durations(lswrs)

In [None]:
%%time


def calculate_mean_and_std(data: xr.Dataset, assignments: xr.DataArray):
    subset = (
        a6.datasets.methods.select.select_levels(levels=500)
        >> a6.features.methods.geopotential.calculate_geopotential_height()
    ).apply_to(data)

    modes = a6.modes.methods.determine_lifetimes_of_modes(assignments)
    dates = [list(mode.get_dates()) for mode in modes]
    z_h_per_mode = [
        subset["z_h"].sel(time=date, method="nearest") for date in dates
    ]

    means = [z_h.mean("time") for z_h in z_h_per_mode]
    means_max = max(mean.max() for mean in means)
    means_min = min(mean.min() for mean in means)

    stds = [z_h.std("time") for z_h in z_h_per_mode]
    stds_max = max(std.max() for std in stds)
    stds_min = min(std.min() for std in stds)

    return (
        assignments.name,
        modes,
        means,
        means_min,
        means_max,
        stds,
        stds_min,
        stds_max,
    )


means_stds = [
    calculate_mean_and_std(data=ds, assignments=assignments)
    for assignments in lswrs
]

In [None]:
%%time
import cartopy.crs as ccrs


def round_to_decade(value: xr.DataArray) -> int:
    return int(np.round(value.values, -1))


def plot_geopotential_mean_and_std(
    name: str,
    modes,
    means: list[xr.DataArray],
    means_min: float,
    means_max: float,
    stds: list[xr.DataArray],
    stds_min: float,
    stds_max: float,
) -> tuple[plt.Figure, plt.Axes]:
    """Plot geopotential height contours, temperature and wind speed."""
    labels = np.arange(1, modes.size + 1, 1, dtype=int)

    n_rows = 8
    n_cols = 5

    if n_rows * n_cols != labels.max():
        raise ValueError("Number of plots not equal to number of LSWRs")

    height = n_rows * 2
    width = n_cols * 2.5

    fig, axs = plt.subplots(
        figsize=(width, height),
        nrows=n_rows,
        # Add 2 columns for colorbars
        # ncols=n_cols + 2,
        ncols=n_cols,
        subplot_kw=a6.plotting.coastlines.create_projection(),
        # gridspec_kw={"width_ratios": [1 for _ in range(n_cols)] + [0.05, 0.05]},
    )
    levels = list(
        range(
            round_to_decade(means_min),
            round_to_decade(means_max),
            5,
        )
    )

    for i, ax in enumerate(axs.flatten()):
        z_h_mean = means[i]
        z_h_std = stds[i]
        heatmap = z_h_std.plot(
            ax=ax,
            cmap="RdBu",
            vmin=stds_min,
            vmax=stds_max,
            add_colorbar=False,
        )

        contours = z_h_mean.plot.contour(
            ax=ax,
            levels=levels,
            cmap="cool",
            transform=ccrs.PlateCarree(),
            linewidths=1.0,
        )
        # contours.clabel(inline=True)
        #         handles_unfilled, labels = cs_unfilled.legend_elements()

        #         ax.legend(handles_filled + handles_unfilled,
        #                   ["range(2-3)", "range(3-4)", "range(4-6)", "3", "4", "6"],
        #                   ncols=2)
        ax.coastlines(alpha=0.5)
        # ax.gridlines(draw_labels=["left", "bottom"])
        ax.set_title(f"{i + 1}")
    fig.suptitle(name)
    # fig.colorbar(heatmap, ax=axs[:,-2], label=r"$\mu(z_\mathrm{500})$ [m]")
    fig.tight_layout()
    plt.savefig(plots / f"lswrs-geopotential-heights-{name}.pdf")


for mean_std in means_stds:
    plot_geopotential_mean_and_std(*mean_std)

## Relation of LSWRs to Power Production

In [None]:
%%time

"""
Preprocess turbine data. This takes ~22 min, so prefer loading (cell below)
if no changes to the preprocessing are required.
"""

paths = list(
    pathlib.Path("/p/home/jusers/emmerich1/juwels/data/production").glob(
        "**/*.nc"
    )
)
print(paths)


def remove_outliers(data: xr.Dataset) -> xr.Dataset:
    power_rating = float(data.attrs["power rating"].split()[0])
    print(power_rating)
    # Only use data points where
    # - production is lower than power rating
    # - production is greater than 0
    return data.where(
        (
            # Find indexes where |P| < power_rating
            (abs(data["production"]) < 1.05 * power_rating)
            &
            # and such where P > 0
            (data["production"] > 0)
        ),
        drop=True,
    )


# Contains the turbine name and the production
turbines: dict[str, xr.Dataset] = {
    path.name: remove_outliers(xr.open_dataset(path)) for path in paths
}

for name, data in turbines.items():
    data.to_netcdf(
        f"/p/home/jusers/emmerich1/juwels/data/production-cleaned-for-analysis/{name}.nc"
    )

In [None]:
# Open preprocessed turbine data if available
paths = list(
    pathlib.Path(
        "/p/home/jusers/emmerich1/juwels/data/production-cleaned-for-analysis"
    ).glob("**/*.nc")
)
# Contains the turbine name and the production
turbines: dict[str, xr.Dataset] = {
    path.name: xr.open_dataset(path) for path in paths
}

In [None]:
%%time

import dataclasses
import datetime


@dataclasses.dataclass
class PowerPerMode:
    label: int
    measurements: list[int] = dataclasses.field(default_factory=list)
    sum: list[float] = dataclasses.field(default_factory=list)
    mean: list[float] = dataclasses.field(default_factory=list)
    std: list[float] = dataclasses.field(default_factory=list)
    normalized_mean: list[float] = dataclasses.field(default_factory=list)
    normalized_std: list[float] = dataclasses.field(default_factory=list)


def get_power_per_lswr(data: xr.Dataset) -> dict[int, PowerPerMode]:
    modes = a6.modes.methods.determine_lifetimes_of_modes(data)
    dates = [xr.DataArray(list(mode.get_dates())) for mode in modes]

    power_per_mode = {
        mode.label: PowerPerMode(label=mode.label) for mode in modes
    }

    for i, (name, turbine) in enumerate(turbines.items()):
        print(f"{data.name}: {i}/{len(turbines)}", end="\r")
        power_rating = float(turbine.attrs["power rating"].split()[0])

        # Resample to daily production and calculate sum, relative mean and std
        resampled = turbine["production"].resample({"time": "1d"}, skipna=True)
        daily_sum = resampled.sum(skipna=True)
        daily_mean = resampled.mean(skipna=True)
        daily_mean_normalized = daily_mean / power_rating
        daily_std = resampled.std(skipna=True)
        daily_std_normalized = daily_std / power_rating

        for mode, date in zip(modes, dates, strict=True):
            mode_power = power_per_mode[mode.label]

            # Get time steps of production where LSWR appeared
            intersection = sorted(set(daily_sum.time.values) & set(date.values))

            # if not intersection:
            #     print(f"WARNING: empty intersection for {name} and mode {mode.label}")
            #     continue

            # Count number of days that contribute to the results
            mode_power.measurements.append(len(intersection))

            # Select time steps of LSWR appearance and calculate sum
            total = daily_sum.sel(time=intersection)
            mode_power.sum.extend(total.values.flatten().tolist())

            # Select time steps of LSWR appearance and calculate mean
            mean = daily_mean.sel(time=intersection)
            mean_normalized = daily_mean_normalized.sel(time=intersection)
            mode_power.mean.extend(mean.values.flatten().tolist())
            mode_power.normalized_mean.extend(
                mean_normalized.values.flatten().tolist()
            )

            # Select time steps of LSWR appearance and calculate std
            std = daily_std.sel(time=intersection)
            std_normalized = daily_std_normalized.sel(time=intersection)
            mode_power.std.extend(std.values.flatten().tolist())
            mode_power.normalized_std.extend(
                std_normalized.values.flatten().tolist()
            )
    return power_per_mode


power_per_method = {lswr.name: get_power_per_lswr(lswr) for lswr in lswrs}

In [None]:
import pandas as pd

latex_code = []


def power_mean_with_std_as_string(stats: list[float]) -> str:
    return f"${np.nanmean(stats) * 100:.2f} \pm {np.nanstd(stats) * 100:.2f}$"


columns = {
    "$N$": lambda lswrs: [
        np.nansum(results.measurements) for results in lswrs.values()
    ],
    "$P_{\mathrm{total}}$ [kW]": lambda lswrs: [
        f"{int(np.nansum(results.sum)):d}" for results in lswrs.values()
    ],
    "$P^{\mathrm{mean}}_{\mathrm{normalized}}$ [\%]": lambda lswrs: [
        power_mean_with_std_as_string(results.normalized_mean)
        for results in lswrs.values()
    ],
    "$P^{\mathrm{std}}_{\mathrm{normalized}}$ [\%]": lambda lswrs: [
        power_mean_with_std_as_string(results.normalized_std)
        for results in lswrs.values()
    ],
}

reform = {
    (name, column): func(method)
    for name, method in power_per_method.items()
    for column, func in columns.items()
}

df = pd.DataFrame.from_dict(reform)

# Add 1 to start indexing at 1 to be conform with LSWR labels
df.index += 1

code = df.to_latex(
    float_format="%.2f",
    label="production-per-lswr-per-method",
    caption=f"Power production for the resulting LSWRs.",
)


with open(
    "/p/home/jusers/emmerich1/juwels/code/a6/notebooks/power-production-table.tex",
    "w",
) as f:
    f.write(code)

df