# ROS3 VFD Log Analysis Dashboard

In [None]:
from dataclasses import dataclass
import io
import re
import numpy as np
from bokeh.models import HoverTool
import holoviews as hv
import panel as pn
hv.extension('bokeh')
pn.extension()

## Log Parser

The class that represents information of one HTTP range GET request:

In [None]:
@dataclass(frozen=True)
class ByteRange:
    start: int
    end: int
    filesize: int

    def __post_init__(self):
        if self.start < 0 or self.end <= 0 or self.filesize <= 0:
            raise ValueError('Start, end, and file size values must be positive integers')
        elif self.end > self.filesize:
            raise ValueError('End value must be smaller or equal to file size')
        elif self.start > self.end:
            raise ValueError('Start value must be smaller or equal to end value')

    @property
    def size(self):
        return self.end - self.start + 1

    def __len__(self):
        return self.size

In [None]:
def parse_fsspec_log(content: bytes) -> list[ByteRange]:
    head_line = re.compile('read: 0 - ')
    fsize_line = re.compile('FileSize: ([0-9]+)')
    range_line = re.compile('<File-like object S3FileSystem, .*?>\s*(read: \d+ - \d+)')

    ranges = list()
    with io.TextIOWrapper(io.BytesIO(content)) as logtxt:
        for line in logtxt:
            if head_line.match(line):
                break
        else:
            raise RuntimeError('HEAD line not found in the log file')

        for line in logtxt:
            match = fsize_line.match(line)
            if match:
                fsize = int(match.group(1))
                break
        else:
            raise RuntimeError('FILESIZE line not found in the log file')

        for line in logtxt:
            match = range_line.search(line)
            if match:
                range = ByteRange(start=int(match.group('start')), 
                                  end=int(match.group('end')),
                                  filesize=fsize)
                if range.size != int(match.group('size')):
                    raise ValueError(f'Reported size different for {match.group()}')
                ranges.append(range)
                
    return ranges

Log file parser:

In [None]:
def parse_ros3vfd_log(content: bytes) -> list[ByteRange]:
    head_line = re.compile('HEAD: Bytes 0 - ')
    fsize_line = re.compile('FILESIZE: ([0-9]+)')
    range_line = re.compile('GET: Bytes (?P<start>[0-9]+) - (?P<end>[0-9]+), Request Size: (?P<size>[0-9]+)')

    ranges = list()
    with io.TextIOWrapper(io.BytesIO(content)) as logtxt:
        for line in logtxt:
            if head_line.match(line):
                break
        else:
            raise RuntimeError('HEAD line not found in the log file')

        for line in logtxt:
            match = fsize_line.match(line)
            if match:
                fsize = int(match.group(1))
                break
        else:
            raise RuntimeError('FILESIZE line not found in the log file')

        for line in logtxt:
            match = range_line.search(line)
            if match:
                range = ByteRange(start=int(match.group('start')), 
                                  end=int(match.group('end')),
                                  filesize=fsize)
                if range.size != int(match.group('size')):
                    raise ValueError(f'Reported size different for {match.group()}')
                ranges.append(range)
                
    return ranges

## Dashboard

Function for generating log stats and plots:

In [None]:
def plot_ros3vfd_log(from_file):
    if from_file is None:
        return
    elif len(from_file) == 0:
        return [pn.pane.Alert('ros3vfd log file empty.', alert_type='danger')]
    try:
        ranges = parse_ros3vfd_log(from_file)
    except Exception as e:
        return [pn.pane.Alert(f'Error: {str(e)}', alert_type='danger')]
    if len(ranges) == 0:
        return [pn.pane.Alert('No range `GET` info found.', alert_type='info')]
    start = np.fromiter([r.start for r in ranges], dtype=np.uint64)
    end = np.fromiter([r.end for r in ranges], dtype=np.uint64)
    req_no = np.arange(len(ranges)) + 1
    sizes = np.fromiter([r.size for r in ranges], np.uint64)
    info = pn.pane.Markdown(f"""
# ros3vfd Log Information

Log size: {len(from_file):,} bytes

HDF5 file size: {ranges[0].filesize:,} bytes

Number of range _GET_ requests: {len(ranges):,}

Overall range _GET_ requests stats:

* Smallest: {np.min(sizes):,} bytes<br/>
* Median: {int(np.median(sizes)):,} bytes<br/>
* Largest: {np.max(sizes):,} bytes

Maximum file byte read: {end.max():,}

Total of file content read: {sizes.sum():,} bytes

Percentage of content read to file size: {100 * (sizes.sum() / ranges[0].filesize) :.2f} %
""")
    data = dict(start=start, end=end, start_event=req_no, end_event=req_no)
    max_offset_range = min(16_000_000, np.max(end))
    req_range = np.where(end <= max_offset_range)[0]
    if req_range.size == 0:
        max_req_range = req_no[-1]
    else:
        max_req_range = req_no[np.where(end <= max_offset_range)[0][-1]] + 1
    ros3plt = hv.Segments(
        data, 
        [
            hv.Dimension('start', label='File offset', range=(0, max_offset_range)),
            hv.Dimension('start_event', label='Req. No.', range=(0, max_req_range)), 
            'end', 
            'end_event'
        ]
    )
    hvrtip = HoverTool(
        tooltips = [
            ('req no', '@start_event'),
            ('start byte', '@start'),
            ('end byte', '@end')
        ]
    )
    ros3plt.opts(width=700, height=600, invert_axes=True, color='blue', 
                 line_width=3, tools=[hvrtip])
    size_hist = hv.Histogram(np.histogram(sizes, bins=512))
    size_hist.opts(color='blue', line_color=None, tools=['hover'],
                   xlabel='Size (bytes)', ylabel='Number of requests')
    
    return [pn.Row(info, size_hist), ros3plt]

### Dashboard Components

In [None]:
log_file = pn.widgets.FileInput()
upld_form = pn.Row(
    pn.pane.Markdown('Please select a ros3vfd log file (limit 10MB):'),
    log_file
)
res = pn.Column()
app = pn.WidgetBox(upld_form, res)

Callback function for interactive log processing invocation:

In [None]:
def callback(value):
    res.objects = plot_ros3vfd_log(value)

Register callback with the appropriate dashboard object:

In [None]:
log_file.param.watch_values(callback, ['value']);

Run the dashboard:

In [None]:
app.servable()