# Chart tablehistograms

Using `nodetool tablehistograms` output, create graphs to easily see latency across tables.

Two modes are available:
- If the output file contains output from only a single run of tablehistograms (e.g., from a diagnostic tarball), then set `has_timestamps` to `False` to analyze.
- If the output file contants multiple outputs from a script, then set `has_timestamps` to `True` to analyze. The expected file format is as follows:

```
10.10.10.10
2024-03-11_11:29:35
OpsCenter/backup_reports.backup_reports_by_id histograms
Percentile  SSTables     Write Latency      Read Latency    Partition Size        Cell Count
                              (micros)          (micros)           (bytes)                  
50%             0.00              0.00              0.00               642                 1
...
```

The timestamp is produced with `date +"%Y-%m-%d_%H:%M:%S"`.

In addition, the output file will have an IP address on line 1 of the file if timestamps are present. This will allow the analysis of output files from multiple nodes.

In [None]:
# GLOBAL VARIABLES
file_path = '/path/to/tablehistograms.out'
has_timestamps = True

In [None]:
# IMPORTS
import re

import datetime as datetime
import plotly.express as px
import pandas as pd

In [None]:
# Returns timestamp
def parse_timestamp(line):
    regex = r'[0-9_\:\-]+'
    match = re.search(regex, line)

    return datetime.datetime.strptime(match.group(0).replace('_', ' '), '%Y-%m-%d %H:%M:%S')

def parse_nodetool_tablehistograms(output, has_timestamps):
    """
    Parses the output of `nodetool tablehistograms` into a DataFrame.
    """
    lines = output.strip().split("\n")
    data = []
    current_table = ""
    for line in lines:
        if line.startswith("Percentile"):
            # Start of a new table section
            if has_timestamps:
                columns = ["Timestamp", "Table - Percentile", "SSTables", "Write Latency (micros)",
                           "Read Latency (micros)", "Partition Size (bytes)", "Cell Count"]
            else:
                columns = ["Table", "Percentile", "SSTables", "Write Latency (micros)",
                       "Read Latency (micros)", "Partition Size (bytes)", "Cell Count"]
        elif "/" in line:
            # Table name line
            parts = line.strip().split()
            keyspace_table_parts = parts[0].split("/")
            current_table = "{}.{}".format(keyspace_table_parts[0], keyspace_table_parts[1])
        elif has_timestamps and line.startswith("2024"):
            current_timestamp = parse_timestamp(line)
        else:
            # Data line
            parts = line.split()
            if len(parts) == 6:  # Ensure it's a data line
                if has_timestamps:
                    parts[0] = "{} - {}".format(current_table, parts[0])
                    data.append([current_timestamp] + parts)
                else:
                    data.append([current_table] + parts)
    
    df = pd.DataFrame(data, columns=columns)
    # Convert numeric columns, handling "NaN" strings and other non-numeric values
    for col in df.columns[2:]:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

In [None]:
tablehistograms = """
2024-03-11_11:29:35
OpsCenter/backup_reports.backup_reports_by_id histograms
Percentile  SSTables     Write Latency      Read Latency    Partition Size        Cell Count
                              (micros)          (micros)           (bytes)                  
50%             0.00              0.00              0.00               642                 1
75%             0.00              0.00              0.00               770                 1
95%             0.00              0.00              0.00              1916                 1
98%             0.00              0.00              0.00              1916                 1
99%             0.00              0.00              0.00              1916                 1
Min             0.00              0.00              0.00               125                 0
Max             0.00              0.00              0.00              1916                 1

OpsCenter/backup_reports histograms
Percentile  SSTables     Write Latency      Read Latency    Partition Size        Cell Count
                              (micros)          (micros)           (bytes)                  
50%             1.00              0.00           1310.72           1131752                72
75%             1.00              0.00           1310.72           1358102               103
95%             1.00              0.00           1310.72           3379391               149
98%             1.00              0.00           1310.72           8409007               258
99%             1.00              0.00           1310.72           8409007               258
Min             1.00              0.00           1310.72             88149                 9
Max             1.00              0.00           1572.86           8409007               258

OpsCenter/bestpractice_results histograms
Percentile  SSTables     Write Latency      Read Latency    Partition Size        Cell Count
                              (micros)          (micros)           (bytes)                  
50%             0.00              0.00              0.00             20501                29
75%             0.00              0.00              0.00            454826               770
95%             0.00              0.00              0.00           1131752               770
98%             0.00              0.00              0.00          52066354               770
99%             0.00              0.00              0.00          52066354               770
Min             0.00              0.00              0.00              6867                13
Max             0.00              0.00              0.00          52066354               770
2024-03-13_15:53:05
OpsCenter/backup_reports.backup_reports_by_id histograms
Percentile  SSTables     Write Latency      Read Latency    Partition Size        Cell Count
                              (micros)          (micros)           (bytes)                  
50%             0.00              0.00              0.00               642                 1
75%             0.00              0.00              0.00               770                 1
95%             0.00              0.00              0.00              1916                 1
98%             0.00              0.00              0.00              1916                 1
99%             0.00              0.00              0.00              1916                 1
Min             0.00              0.00              0.00               125                 0
Max             0.00              0.00              0.00              1916                 1

OpsCenter/backup_reports histograms
Percentile  SSTables     Write Latency      Read Latency    Partition Size        Cell Count
                              (micros)          (micros)           (bytes)                  
50%             1.00              0.00           1310.72           1131752                72
75%             1.00              0.00           1310.72           1358102               103
95%             1.00              0.00           1310.72           3379391               149
98%             1.00              0.00           1310.72           8409007               258
99%             1.00              0.00           1310.72           8409007               258
Min             1.00              0.00           1310.72             88149                 9
Max             1.00              0.00           1572.86           8409007               258

OpsCenter/bestpractice_results histograms
Percentile  SSTables     Write Latency      Read Latency    Partition Size        Cell Count
                              (micros)          (micros)           (bytes)                  
50%             0.00              0.00              0.00             20501                29
75%             0.00              0.00              0.00            454826               770
95%             0.00              0.00              0.00           1131752               770
98%             0.00              0.00              0.00          52066354               770
99%             0.00              0.00              0.00          52066354               770
Min             0.00              0.00              0.00              6867                13
Max             0.00              0.00              0.00          52066354               770
"""

In [None]:
tablehistogram = """
OpsCenter/backup_reports.backup_reports_by_id histograms
Percentile  SSTables     Write Latency      Read Latency    Partition Size        Cell Count
                              (micros)          (micros)           (bytes)                  
50%             0.00              0.00              0.00               642                 1
75%             0.00              0.00              0.00               770                 1
95%             0.00              0.00              0.00              1916                 1
98%             0.00              0.00              0.00              1916                 1
99%             0.00              0.00              0.00              1916                 1
Min             0.00              0.00              0.00               125                 0
Max             0.00              0.00              0.00              1916                 1

OpsCenter/backup_reports histograms
Percentile  SSTables     Write Latency      Read Latency    Partition Size        Cell Count
                              (micros)          (micros)           (bytes)                  
50%             1.00              0.00           1310.72           1131752                72
75%             1.00              0.00           1310.72           1358102               103
95%             1.00              0.00           1310.72           3379391               149
98%             1.00              0.00           1310.72           8409007               258
99%             1.00              0.00           1310.72           8409007               258
Min             1.00              0.00           1310.72             88149                 9
Max             1.00              0.00           1572.86           8409007               258

OpsCenter/bestpractice_results histograms
Percentile  SSTables     Write Latency      Read Latency    Partition Size        Cell Count
                              (micros)          (micros)           (bytes)                  
50%             0.00              0.00              0.00             20501                29
75%             0.00              0.00              0.00            454826               770
95%             0.00              0.00              0.00           1131752               770
98%             0.00              0.00              0.00          52066354               770
99%             0.00              0.00              0.00          52066354               770
Min             0.00              0.00              0.00              6867                13
Max             0.00              0.00              0.00          52066354               770
"""

In [None]:
# Read the file contents
with open(file_path, 'r') as file:
    nodetool_output = file.read()

In [None]:
df = parse_nodetool_tablehistograms(nodetool_output, has_timestamps)

In [None]:
for metric in ["SSTables","Write Latency (micros)","Read Latency (micros)","Partition Size (bytes)","Cell Count"]:
    if has_timestamps:
        fig = px.line(df, x="Timestamp", y=metric, title="Table Histograms - {}".format(metric), color="Table - Percentile")
    else:
        fig = px.line(df, x="Percentile", y=metric, title="Table Histograms - {}".format(metric), color="Table")
    fig.show()