# Storage Utilization Heatmap

In [None]:
import nsys_display
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo

from IPython.display import HTML, Markdown, display
from plotly.subplots import make_subplots

display(HTML("<style>.container { width:95% !important; }</style>"))
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", None)
pyo.init_notebook_mode()

## Throughput

This line graph displays the summary of all operations on NFS, Lustre, Local and NVMe-oF file systems
for the profiled volumes:
* x axis represents the rank duration, scaling from 0 to the maximum duration across all ranks.
* y axis represents the mean Bytes read and written across all ranks.

In [None]:
# Load the throughput DataFrame from the parquet file
throughput_df = pd.read_parquet("throughput_analysis.parquet")

if throughput_df.empty:
    display(Markdown("No throughput data to display."))
else:
    # Create unique name.
    throughput_df["Name"] = (
        throughput_df["Rank"].astype(str)
        + "/"
        + throughput_df["Hostname"].astype(str)
        + "/"
        + throughput_df["Volume"].astype(str)
        + "/"
        + throughput_df["Name"].astype(str)
    )

    # Convert ns to seconds
    throughput_df["Duration"] = throughput_df["Duration"] * 1e-9

    throughput_metrics = ["Read", "Write"]

    nsys_display.display_summary_graph(
        throughput_df,
        throughput_metrics,
        xaxis_title="Duration (s)",
        yaxis_title="Value",
        title="Usage Summary (bins=REPLACE_BIN)",
    )

These heatmaps display NFS, Lustre, Local and NVMe-oF filesystem operations which are collected using NVTX counters
via the storage-metrics plugin and the --storage-metrics feature:
* x axis represents the rank duration, scaling from 0 to the maximum duration across all ranks.
* y axis represents the set of Rank/Hostname/Volume/Device name for which metrics were collected.

The heatmaps present:
* Bytes read
* Bytes written

In [None]:
if throughput_df.empty:
    print("No throughput data to display.")
else:
    # Create heatmaps for each throughput metric.
    nsys_display.display_heatmaps(
        throughput_df,
        throughput_metrics,
        xaxis_title="Duration (s)",
        yaxis_title="Rank/Hostname/Volume/Device",
        zaxis_title="Bytes",
        title="Throughput (bins=REPLACE_BIN)",
    )

## Latency

This line graph displays the summary of all latency metrics on NFS file systems for the profiled volumes:
* x axis represents the rank duration, scaling from 0 to the maximum duration across all ranks.
* y axis represents the mean latency in milliseconds for read and write operations across all ranks.

In [None]:
# Load the latency DataFrame from the parquet file
latency_df = pd.read_parquet("latency_analysis.parquet")

if latency_df.empty:
    display(Markdown("No latency data to display."))
else:
    # Create unique name.
    latency_df["Name"] = (
        latency_df["Rank"].astype(str)
        + "/"
        + latency_df["Hostname"].astype(str)
        + "/"
        + latency_df["Volume"].astype(str)
        + "/"
        + latency_df["Name"].astype(str)
    )

    # Convert ns to seconds
    latency_df["Duration"] = latency_df["Duration"] * 1e-9

    latency_metrics = [
        "Read RPC queue",
        "Read RPC RTT",
        "Read RPC exe",
        "Write RPC queue",
        "Write RPC RTT",
        "Write RPC exe",
    ]

    nsys_display.display_summary_graph(
        latency_df,
        latency_metrics,
        xaxis_title="Duration (s)",
        yaxis_title="Value",
        title="Latency Summary (bins=REPLACE_BIN)",
    )

These heatmaps display latency on NFS filesystem which are collected using NVTX counters
via the storage-metrics plugin and the --storage-metrics feature:
* x axis represents the rank duration, scaling from 0 to the maximum duration across all ranks.
* y axis represents the set of Rank/Hostname/Volume/Device name for which metrics were collected.

The heatmaps present:
* Read RPC queue, Read RPC RTT, Read RPC exe latencies in milliseconds
* Write RPC queue, Write RPC RTT, Write RPC exe latencies in milliseconds

In [None]:
if latency_df.empty:
    print("No latency data to display.")
else:
    # Create heatmaps for read and write latency metrics.
    nsys_display.display_heatmaps(
        latency_df,
        latency_metrics,
        xaxis_title="Duration (s)",
        yaxis_title="Rank/Hostname/Volume/Device",
        zaxis_title="Latency duration (ms)",
        title="Latency (bins=REPLACE_BIN)",
    )

# Files

The table associates each rank number with the original filename. Ranks are assigned assuming that
the file names include the rank with sufficient zero padding for proper sorting. Otherwise, the
actual rank may differ from the assigned ID.

In [None]:
files_df = pd.read_parquet("files.parquet")
display(files_df)