### Inspect the batch times of the Gridion barcoded experiment and the promethion barcoded experiment

Uses the old format of readfish logs - <>R/<>S

Runs were simulated using Icarust 0.0.7, commit cf27f12071f7c9b515883d5e3bf645aad4609831 using the `config_dnar10_5Khz_human_barcoded.toml`, a copy of which has been included in this repository.
They were run for two hours a piece using the command:
```bash
cargo run -r -- -s Profile_tomls/config_dnar10_5khz_human_barcoded.toml -c config_grid.ini -v -p 
```

Where the number of channels and the break_read_chunks was changed to 512 and 0.8 for prom and 3000 and 1.0 for prom. 

On a Promethion P48 Beta tower. The basecalling server was dorado using the following comand:

```bash
sudo ./bin/dorado_basecall_server --log_path /var/log/dorado -d /data/projects/rory_says_hi/ont-dorado-server/data/ --config dna_r10.4.1_e8.2_400bps_5khz_fast_prom.cfg --ipc_threads 16 --port /tmp/.guppy/5556 --dorado_download_path /opt/ont/dorado-models --device cuda:all --num_alignment_threads 24
```

The profiling files were agthered using a special fork of readfish 2024.2.0, commit cd20ff16c5f3a5f54124515fe58aabde3dc8df3a
https://github.com/LooseLab/readfish/tree/profiling

The readfish logs for the experiments in the paper comparing NA12878 vs. 22Rv1 and NB4 are also included and they are analysed at the start of this notebook.

In [None]:
import argparse
import lzma
import re
import sys
from itertools import pairwise
from pathlib import Path
from string import ascii_uppercase

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.ticker import EngFormatter, FuncFormatter, ScalarFormatter

In [None]:
# Supress the Setting with copy warning in the facetted function
pd.options.mode.chained_assignment = None
mpl.rcParams.update(
    {
        "figure.dpi": 300,
        "figure.facecolor": "w",
        "axes.grid": True,
        "savefig.facecolor": "w",
        "animation.embed_limit": 2**128,
    }
)

### Analyse the original logs.

In [None]:
def reader(fn):
    pat = re.compile(r"(.*?) [\w\.]+ (\d+)R\/(\d+.\d+)s")

    def _reader(fh):
        for line in fh:
            if m := pat.findall(line):
                yield m[0]

    handler = lzma.open if fn.endswith("xz") else open
    with handler(fn, "rt") as fh:
        df = pd.DataFrame(_reader(fh), columns=["Time", "Reads", "Duration (s)"])
    df["Time"] = pd.to_datetime(df["Time"], format="%Y-%m-%d %H:%M:%S,%f")
    df[["Reads", "Duration (s)"]] = df[["Reads", "Duration (s)"]].astype("float")
    df["Per read (s)"] = df["Duration (s)"] / df["Reads"]
    return df


def plot(data, filename, threshold, name=None, y="Duration (s)", ax=None):
    data = data.sort_values("Time", ascending=True)
    colour = np.where(data["Duration (s)"].gt(threshold), "r", "b")
    if ax is None:
        fig, ax = plt.subplots(figsize=(18, 6))
    sns.scatterplot(
        data=data,
        x="Time",
        y=y,
        c=colour,
        alpha=0.25,
        linewidths=0,
        edgecolor=None,
        s=data["Reads"].pow(1 / 2),
        ax=ax,
    )
    sns.rugplot(data=data, x="Time", y=y, lw=1, alpha=0.005)
    gradation = 0.1
    # plt.yticks(np.arange(0, np.ceil(data["Per read"].max()) + gradation, gradation))
    if name is not None:
        fig.suptitle(name)
    fig.tight_layout()
    fig.savefig(filename)
    return ax

In [None]:
# df_prom = pd.concat([reader(f) for f in ["ru_prom_mappyrs.log.xz"]], ignore_index=True)

In [None]:
# fig = plot(df_prom, "test.pdf", 1, "test")

In [None]:
# ax = plot(df_prom, "test.pdf", 1, "test", y="Per read (s)")
# plt.yscale("log")
# ax.set_ylim((0.0001, 1))

### Profiling, using the sexy new branch of readfish
Ran against Icarust which was simulating either HG38 simple or HG38 simple chr1.


In [None]:
dfs = []
for f in Path().glob("profile_log*.tsv.xz"):
    print(f)
    df = pd.read_csv(
        f,
        sep="\t",
        header=None,
        names="Loop\tStage\tCPU Usage (%)\tMemory Usage (MB)\tTime (s)\tCum. Time (s)\tNum. Chunks\tYield\tBase\tAl".split(
            "\t"
        ),
    )
    sequencer, aligner = re.findall(
        r"(prom|gridion)_(mappy_rs|mappy|built_in)", f.stem
    )[0]
    df["Sequencer"] = sequencer
    df["Aligner"] = aligner
    df["Sequencer/Aligner"] = f"{sequencer}/{aligner}"
    # df["aligner"] = aligner
    dfs.append(df)
df_sim = pd.concat(dfs)
df_sim[["Timestamp", "Loop"]] = df_sim["Loop"].str.split(",", expand=True)

In [None]:
numerical_columns = [
    "Loop",
    "Yield",
    "Time (s)",
    "Cum. Time (s)",
    "Num. Chunks",
    "Yield",
    "Base",
    "Al",
]
for numerical_column in numerical_columns:
    df_sim[numerical_column] = pd.to_numeric(df_sim[numerical_column], errors="coerce")
df_sim["Timestamp"] = pd.to_datetime(df_sim["Timestamp"])

Get only the total loop time, which starts when we query for a new batch of read chunks and finishes after we have basecalled, aligned and sent decisions for all chunks in batch

In [None]:
df_all = df_sim[df_sim["Stage"] == "Total loop"].sort_values("Loop")
# Set the alignment time to match the basecalling time for built in dorado alignments, as there is no way of seperating these, they are essentially the same wghen dorado does the alignments
df_all["Al"] = np.where(
    df_all["Sequencer/Aligner"].str.contains("built"), df_all["Base"], df_all["Al"]
)

The number of time run for

In [None]:
(
    df_all.groupby("Sequencer/Aligner")["Timestamp"].max()
    - df_all.groupby("Sequencer/Aligner")["Timestamp"].min()
)

#### Plot bivariate of alignment time vs. basecalling time, all on one plot, splitting them out by hue and kde density esitmates in the margins

In [None]:
g = sns.jointplot(
    data=df_all,
    x="Al",
    y="Base",
    kind="scatter",
    height=8,
    hue="Sequencer/Aligner",
)

# Set the limits
g.ax_joint.set_xlim(0, 2)
g.ax_joint.set_ylim(0, 2)
g.set_axis_labels(
    xlabel="Alignment (s)",
    ylabel="Basecalling (s)",
)
g.savefig("test.pdf")

In [None]:
# Normalise the number of bases in a batch by the number of reads in that batch
df_all["Mean Read Len."] = df_all["Yield"] / df_all["Num. Chunks"]
# For batches with no chunks in
df_all["Mean Read Len."] = df_all["Mean Read Len."].fillna(0.0)

#### Plot bivariate of alignment time vs. basecalling time, all on one plot, splitting them out by hue and kde density esitmates in the margins
But this time do it manuall, so we can weight the size of the points on the plot by the number of bases in a batch, and also stack the kde density

In [None]:
g = sns.JointGrid(
    data=df_all,
    x="Al",
    y="Base",
    hue="Sequencer/Aligner",
    height=8,
)
# g.plot_joint(sns.scatterplot, sizes=(20,200), alpha=0.5)
data = df_all[(df_all["Al"] <= 2.0) & (df_all["Base"] <= 2.0)]
sns.scatterplot(
    ax=g.ax_joint,
    size="Yield",
    hue="Sequencer/Aligner",
    data=data,
    x="Al",
    y="Base",
    sizes=(20, 200),
)
sns.kdeplot(
    ax=g.ax_marg_x,
    data=data,
    x="Al",
    multiple="stack",
    hue="Sequencer/Aligner",
    legend=False,
    gridsize=200,
)
sns.kdeplot(
    ax=g.ax_marg_y,
    data=data,
    y="Base",
    multiple="stack",
    hue="Sequencer/Aligner",
    legend=False,
    gridsize=200,
)
g.ax_joint.set_xlim(0, 2)
g.ax_joint.set_ylim(0, 2)
g.set_axis_labels(
    xlabel="Alignment (s)",
    ylabel="Basecalling (s)",
)
g.savefig("test_manual_weights.pdf", dpi=300, bbox_inches="tight")

#### Same as above Jointplot, but weighted by the mean read length of the batch

In [None]:
g = sns.JointGrid(
    data=df_all,
    x="Al",
    y="Base",
    hue="Sequencer/Aligner",
    height=8,
)
# g.plot_joint(sns.scatterplot, sizes=(20,200), alpha=0.5)
data = df_all[(df_all["Al"] <= 2.0) & (df_all["Base"] <= 2.0)]
sns.scatterplot(
    ax=g.ax_joint,
    size="Mean Read Len.",
    hue="Sequencer/Aligner",
    data=data,
    x="Al",
    y="Base",
    sizes=(20, 200),
)
sns.kdeplot(
    ax=g.ax_marg_x,
    data=data,
    x="Al",
    multiple="stack",
    hue="Sequencer/Aligner",
    legend=False,
    gridsize=200,
)
sns.kdeplot(
    ax=g.ax_marg_y,
    data=data,
    y="Base",
    multiple="stack",
    hue="Sequencer/Aligner",
    legend=False,
    gridsize=200,
)
g.ax_joint.set_xlim(0, 2)
g.ax_joint.set_ylim(0, 2)
g.set_axis_labels(
    xlabel="Alignment (s)",
    ylabel="Basecalling (s)",
)
g.savefig("test_manual_weights.pdf", dpi=300, bbox_inches="tight")

In [None]:
# Function to create a size legend with absolute sizes as determined by the plot
def create_size_legend(ax, global_min, global_max, scale=100):
    handles = []
    labels = [
        100_000,
        300_000,
        500_000,
        1_000_000,
        2_500_000,
        5_000_000,
        10_000_000,
    ]  # Absolute sizes you want to display
    for size in labels:
        normalized_size = normalize_sizes(
            np.array([size]), global_min, global_max, scale
        )
        handles.append(
            plt.scatter(
                [], [], s=normalized_size, alpha=0.5, color="gray", edgecolors="w"
            )
        )
    legend = ax.legend(
        handles,
        labels,
        title="Size (absolute)",
        scatterpoints=1,
        frameon=True,
        labelspacing=1,
    )
    ax.add_artist(legend)

In [None]:
# Function to determine colors based on size limits


def get_colors(sizes, limits=None):
    limits = [-1, 250_000, 500_000, 750_000, 1_000_000] if limits is None else limits
    conditions = [(sizes > a) & (sizes <= b) for a, b in pairwise(limits)]
    conditions.append((sizes > limits[-1]))
    choices = ["green", "orange", "red", "blue", "purple", "yellow", "teal"]
    return np.select(conditions, choices[: len(conditions)], default="gray")

In [None]:
df_all = df_all.sort_values(["Sequencer/Aligner"])

#### As it was hard to groupings where sequencers piled up points on the above plots, split them out into a faceted grid
Code for facet grid taken from https://github.com/mwaskom/seaborn/blob/09195d4c314d9477866ba4bd5cb650c3a7c01e41/seaborn/axisgrid.py#L1679

In [None]:
df_all

In [None]:
# Normalise the bases by size across all different aligner/sequencer combinations, otherwise each axis has it'w own max/min size and they aren't comparable ( this now is range [0-1])
df_all["Norm. bases"] = (
    (df_all["Yield"] - df_all["Yield"].min())
    / (df_all["Yield"].max() - df_all["Yield"].min())
    * 100
)
# nice name
lookup = {
    "gridion": "Gridion",
    "prom": "PromethION",
    "mappy": "Mappy",
    "mappy_rs": "Mappy-rs",
    "built_in": "Dorado Alignments",
}


def draw_facetted_joints(
    df: pd.DataFrame,
    x="Al",
    y="Base",
    s="Yield",
    row_key: str = "Sequencer",
    col_key: str = "Aligner",
    ratio: int = 4,
    # legend_labels: list[str] = None,
    # legend_title="Num. bases",
    legend_limits=None,
    #     Factor to use to adjust the axis below
    dxf=0.03,
    dyf=0.065,
    labels=iter(ascii_uppercase),
    #     horizontal lines to draw on the joint axes
    hlines: dict[str, list[float]] = {},
    #     vertical lines to draw on the join axes
    vlines: dict[str, list[float]] = {},
):
    cols = df_all[col_key].unique()
    rows = df_all[row_key].unique()
    ncols = len(cols)
    nrows = rows.shape[0]
    fig = plt.figure(figsize=(ratio * ncols, ratio * nrows))
    gs = plt.GridSpec((ratio * nrows) + nrows, (ratio * ncols) + ncols)
    for row_idx, row in enumerate(rows):
        for col_idx, col in enumerate(cols):
            label = next(labels) if labels else None
            #             Pure magic worked out by grid height, sow we can insert the axis for the marginal axes inbetween the main plot axes
            sub_grid_row_height = ((row_idx + 1) + row_idx * ratio), (
                ratio * (row_idx + 1)
            ) + (row_idx + 1)
            sub_grid_col_height = (
                col_idx * ratio + col_idx,
                (col_idx + 1) * ratio + col_idx,
            )

            ax_joint = fig.add_subplot(
                gs[
                    sub_grid_row_height[0] : sub_grid_row_height[1],
                    sub_grid_col_height[0] : sub_grid_col_height[1],
                ]
            )
            ax_marg_x = fig.add_subplot(
                gs[
                    sub_grid_row_height[0] - 1,
                    sub_grid_col_height[0] : sub_grid_col_height[1],
                ],
                sharex=ax_joint,
            )

            ax_marg_y = fig.add_subplot(
                gs[
                    sub_grid_row_height[0] : sub_grid_row_height[1],
                    sub_grid_col_height[1],
                ],
                sharey=ax_joint,
            )
            #             Make the plot look nice
            plt.setp(ax_marg_x.get_xticklabels(), visible=False)
            plt.setp(ax_marg_y.get_yticklabels(), visible=False)
            plt.setp(ax_marg_x.get_xticklabels(minor=True), visible=False)
            plt.setp(ax_marg_y.get_yticklabels(minor=True), visible=False)
            plt.setp(ax_marg_x.yaxis.get_majorticklines(), visible=False)
            plt.setp(ax_marg_x.yaxis.get_minorticklines(), visible=False)
            plt.setp(ax_marg_y.xaxis.get_majorticklines(), visible=False)
            plt.setp(ax_marg_y.xaxis.get_minorticklines(), visible=False)
            plt.setp(ax_marg_x.get_yticklabels(), visible=False)
            plt.setp(ax_marg_y.get_xticklabels(), visible=False)
            plt.setp(ax_marg_x.get_yticklabels(minor=True), visible=False)
            plt.setp(ax_marg_y.get_xticklabels(minor=True), visible=False)
            ax_marg_x.yaxis.grid(False)
            ax_marg_y.xaxis.grid(False)
            # Manually adjust the space between specific subplots, otherwise they're too smushed together
            dx, dy = 0, 0
            # Magic maths I had to work out so they space outwards from the center with outer ones going further than inner ones
            dx -= abs(ncols - col_idx + 1) * dxf
            dy += abs(nrows - row_idx + 1) * dyf
            for ax in [ax_joint, ax_marg_x, ax_marg_y]:
                pos = ax.get_position()
                new_pos = [
                    pos.x0 + dx,
                    pos.y0 + dy,
                    pos.width,
                    pos.height,
                ]  # Move ax4 up by 0.05
                ax.set_position(new_pos)
            #             Subset the main dataframe
            df_view = df[(df[row_key] == row) & (df[col_key] == col)]
            df_view["colour"] = get_colors(df_view[s], limits=legend_limits)

            ax = sns.scatterplot(
                data=df_view,
                x="Al",
                c=df_view["colour"],
                y="Base",
                size=df_view[s],
                ax=ax_joint,
                legend=False,
                alpha=0.5,
            )
            ax.tick_params(labelsize=13)

            ax_marg_x = sns.kdeplot(
                data=df_view,
                x="Al",
                ax=ax_marg_x,
                bw_adjust=0.1,
                fill=True,
                # palette="crest",
            )
            ax_marg_y = sns.kdeplot(
                data=df_view,
                y="Base",
                ax=ax_marg_y,
                bw_adjust=0.1,
                fill=True,
                # palette="crest",
            )

            if not (row, col) in {("prom", "mappy"), ("mappy", "prom")}:
                ax_joint.set_xlim(0, 2)
                ax_joint.set_ylim(0, 2)
            ax_marg_x.set_title(f"{lookup[row]} with {lookup[col]}", fontsize=14)
            LABEL_FONTSIZE = 14
            #             Faff on tidying up labels
            if row_idx == len(rows) - 1:
                ax_joint.set_xlabel("Alignment (s)", fontsize=LABEL_FONTSIZE)
            else:
                ax_joint.set_xlabel("")
            if col_idx == 0:
                ax_joint.set_ylabel("Basecalling (s)", fontsize=LABEL_FONTSIZE)
            else:
                ax_joint.set_ylabel("")

            # label the panel if there is a label
            if label is not None:
                ax_marg_x.text(
                    -0.075,
                    0.75,
                    label,
                    ha="center",
                    va="center",
                    transform=ax_marg_x.transAxes,
                    fontsize=16,
                    fontweight="semibold",
                )

            # Draw vertical and horizontal lines if there are any
            if vlines:
                for coord in vlines.get(col, []):
                    ax_joint.axvline(coord, linewidth=0.75, linestyle="--", c="k")
                    ax_marg_x.axvline(coord, linewidth=0.75, linestyle="--", c="k")
            if hlines:
                for coord in hlines.get(col, []):
                    ax_joint.axhline(coord, linewidth=0.75, linestyle="--", c="k")
                    ax_marg_y.axhline(coord, linewidth=0.75, linestyle="--", c="k")
            ax_joint.yaxis.set_major_formatter(ScalarFormatter())
    # create_color_legend(fig, legend_limits, legend_title)

    sns.utils.despine(fig)
    # fig.suptitle(
    #     "readfish Alignment and Basecalling times for\n different Aligners on PromethION and GridION",
    #     y=1.3,
    #     fontsize=20,
    #     fontweight="medium",
    #     verticalalignment="bottom",
    # )
    return fig

In [None]:
# Function to create a size legend with absolute sizes
def create_color_legend(fig, labels=None, title="Num. bases", **kwargs):
    colors = ["green", "orange", "red", "blue", "purple", "yellow", "teal"]
    labels = (
        [
            "< 250,000",
            "250,001 - 500,000",
            "500,001 - 750,000",
            "750,001 - 1,000,000",
            "> 1,000,000",
        ]
        if labels is None
        else [f"{a+1} - {b}" for a, b in pairwise(labels)] + [f"> {labels[-1]}"]
    )
    handles = [
        plt.Line2D(
            [0], [0], marker="o", color="w", markerfacecolor=color, markersize=10
        )
        for color in colors[: len(labels)]
    ]
    legend = fig.legend(
        handles,
        labels,
        title=title,
        columnspacing=1,
        scatterpoints=1,
        ncol=kwargs.pop("ncols", 1),
        frameon=kwargs.pop("frame_on", True),
        labelspacing=kwargs.pop("labelspacing", 1),
        fontsize=12,
        bbox_to_anchor=kwargs.pop("bbox_to_anchor", (0.05, 1.05)),
        loc=kwargs.pop("loc", "lower left"),
        **kwargs,
    )
    return legend

In [None]:
fig = draw_facetted_joints(
    df_all,
    row_key="Sequencer",
    col_key="Aligner",
    ratio=5,
)
create_color_legend(fig, bbox_to_anchor=(1, 0.65), loc="center")
fig.savefig("test_facetted.jpg", bbox_inches="tight")

In [None]:
df_all["Sequencer"].unique()

In [None]:
df_all["Mean Read Len."].agg(["min", "max"])

In [None]:
limits = (-1, 1000, 2000, 3000, 4000, 5000)
lines = {"gridion": [0.8], "prom": [1.0]}
fig = draw_facetted_joints(
    df_all,
    row_key="Aligner",
    col_key="Sequencer",
    s="Mean Read Len.",
    ratio=5,
    legend_limits=limits,
    labels=iter(ascii_uppercase),

    # legend_title="",
    dxf=0.065,
    dyf=0.03,
    vlines=lines,
    hlines=lines
)
create_color_legend(
    fig,
    labels=limits,
    title="Batch Mean Read length",
    loc="upper center",
    ncols=6,
    frame_on=False,
    bbox_to_anchor=(0.31, 1.07),
    title_fontproperties={"weight": "normal", "size": 13},
    labelspacing=0.65
)
fig.savefig("figure_5.jpg", bbox_inches="tight")
fig.savefig("figure_5.pdf", bbox_inches="tight")


In [None]:
df_sim = df_sim.sort_values(["Timestamp", "Loop"])

In [None]:
df_sim["Memory Usage (MB)"] = np.where(df_sim["Memory Usage (MB)"]=="*", np.NaN, df_sim["Memory Usage (MB)"])

In [None]:
df_sim["Memory Usage (MB)"] = df_sim["Memory Usage (MB)"].fillna(method="ffill")

In [None]:
df_all = df_sim[df_sim["Stage"]=="Total loop"]

In [None]:
df_all = df_all.drop_duplicates("Timestamp")

In [None]:
df_all = df_all.reset_index(drop=True)

In [None]:
df_all["Memory Usage (MB)"] = pd.to_numeric(df_all["Memory Usage (MB)"])
df_all["CPU Usage (%)"] = pd.to_numeric(df_all["CPU Usage (%)"])

In [None]:
from datetime import datetime
df_all

In [None]:
t = df_all.groupby("Sequencer/Aligner", as_index=False)["Timestamp"].apply(lambda x: datetime.utcfromtimestamp(0) + (x - x.min()))

In [None]:
t.index = t.index.droplevel(0)

In [None]:
df_all["Timestamp"] = t

In [None]:
import seaborn.objects as so
import matplotlib.dates as mdates
myFmt = mdates.DateFormatter('%H:%M')
f = mpl.figure.Figure()
p = so.Plot(df_all, "Timestamp", color="Sequencer/Aligner").pair(y=["Memory Usage (MB)", "CPU Usage (%)"]).layout(size=(8, 4)).facet(col="Sequencer/Aligner")
p = p.add(so.Area(), legend=False).share(x=False).on(f).plot()
for i, ax in enumerate(p._figure.get_axes()):
    if i < 6:
        offset = 0 if i % 2 else 0.08
        ax.set_title(ax.get_title(), fontsize=8, y =1 + offset)
    else:
        pass
    ax.xaxis.set_major_formatter(myFmt)
    ax.tick_params(axis="x", rotation=90, labelsize=6, pad=0.1, length=0.4)
    if i == 8:
        ax.set_xlabel("Run time (H:M)", x=1, ha="center")
    else:
        ax.set_xlabel("")

f.align_ylabels()
#     ax.tick_params(axis="x", labelsize=6, rotation=90)
p.save("Supplemental_Fig_S6.pdf", bbox_inches="tight", dpi=300)
p.save("Supplemental_Fig_S6.jpg", bbox_inches="tight", dpi=300)

In [None]:
df2 = df_all.melt(
    id_vars=["Sequencer", "Aligner"],
    value_vars=["Base", "Al", "Time (s)"],
    var_name="Type",
)
df2["Combined"] = df2.apply(
    lambda row: f"{row['Sequencer']}/{row['Aligner']}/{row['Type']}", axis=1
)
g = sns.displot(data=df2, x="value", hue="Combined", multiple="stack")
g.set(xlim=(0, 1.5))

In [None]:
g = sns.displot(
    data=df2,
    x="value",
    hue="Type",
    multiple="layer",
    row="Sequencer",
    col="Aligner",
    element="poly",
)
g.set(xlim=(0, 1.5))

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from IPython.display import HTML
from matplotlib.animation import FuncAnimation
from mpl_toolkits.mplot3d import Axes3D

# Example dataframe
data = {
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    "y": [5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
    "z": [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
    "category": ["A", "A", "A", "A", "B", "B", "B", "B", "B", "B"],
}


# Apply Seaborn styling
sns.set(style="whitegrid")

# Create a 3D plot
fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")

# Scatter plot
for category, group in df_all.groupby("Sequencer/Aligner"):
    ax.scatter(
        group["Al"], group["Base"], group["Time (s)"], label=category, s=0.1, alpha=0.5
    )

# Set labels
ax.set_xlabel("Alignment (s)")
ax.set_ylabel("Base calling (s)")
ax.set_zlabel("Total Loop (s)")


ax.set_xlim(0, 2)
ax.set_ylim(0, 2)
ax.set_zlim(0, 2)

# Add legend outside the plot
ax.view_init(elev=20, azim=65)

ax.legend(bbox_to_anchor=(1.15, 0.5), loc="center left", markerscale=5)
# fig = plt.gcf()
# sns.move_legend(ax, loc="right")
# Show plot


def update(num, ax):
    ax.view_init(elev=20, azim=num)


# Create animation
ani = FuncAnimation(fig, update, frames=range(0, 361, 2), fargs=(ax,), interval=100)
# Save animation as GIF
ani.save("rotation_animation.gif", writer="pillow", fps=10)

# Display animation
HTML(ani.to_jshtml())
# Rotate the axes and update
# for angle in range(0, 360*4 + 1):
#     # Normalize the angle to the range [-180, 180] for display
#     angle_norm = (angle + 180) % 360 - 180

#     # Cycle through a full rotation of elevation, then azimuth, roll, and all
#     elev = azim = roll = 0
#     if angle <= 360:
#         elev = angle_norm
#     elif angle <= 360*2:
#         azim = angle_norm
#     elif angle <= 360*3:
#         roll = angle_norm
#     else:
#         elev = azim = roll = angle_norm

#     # Update the axis view and title
#     ax.view_init(elev, azim, roll)
#     plt.title('Elevation: %d°, Azimuth: %d°, Roll: %d°' % (elev, azim, roll))

#     plt.draw()
#     plt.pause(.001)