In [None]:
import pandas as pd
import numpy as np
import math
from IPython.display import display
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import HoverTool, ranges
output_notebook()

In [None]:
def readtrace(fname):
    ret = {}
    with open(fname, "r") as file:
        name = None
        cols = None
        types = None
        data = []
        for line in file:
            line = line.strip()
            if line == "": continue
            elif line == "%":
                df = pd.DataFrame.from_records(data, columns=cols)
                for i, col in enumerate(df.columns.values):
                    if types[i] == "num": df[col] = pd.to_numeric(df[col])
                    elif types[i] == "time": df[col] = pd.to_datetime(df[col], unit="ms")
                ret[name] = df
                name = None
                cols = None
                types = None
                data = []
            elif name is None: name = line
            elif cols is None: cols = line.split("\t")
            elif types is None: types = line.split("\t")
            else: data.append(line.split("\t"))
    return ret

class reader:
    def __init__(self, fname):
        self.data = readtrace(fname)
    def stats(self):
        ret = []
        for name in ["count", "dist"]:
            df = self.data["stat:" + name].set_index(["trace", "stat"])
            df.index.names = [None, None]
            ret.append(df)
        return ret
    def seqplot(self):
        rpcs = self.data["rpcs"]
        events = self.data["events"]
        spans = self.data["spans"]
        axes = self.data["axes"]["name"].tolist()
        timerange = self.data["timerange"]["time"].tolist()

        hover = HoverTool()
        hover.tooltips = "<div style='max-width: 400px; word-wrap: wrap-all'>@content</div>"
        p = figure(y_axis_type="datetime", x_range=axes, tools=["ypan", "ywheel_zoom", hover, "reset"], active_scroll="ywheel_zoom")
        p.segment(y0="start", y1="end", x0="location", x1="location", source=ColumnDataSource(spans), line_width=4, color="lime", alpha=0.6)
        p.triangle("location", "end", source=ColumnDataSource(spans), size=12, color="green")
        p.inverted_triangle("location", "start", source=ColumnDataSource(spans), size=8, color="lime")
        p.circle("origin", "time", size=8, source=ColumnDataSource(rpcs), color="blue")
        p.segment(y0="time", y1="time", x0="origin", x1="destination", source=ColumnDataSource(rpcs), color="blue")
        p.circle("location", "time", size=8, source=ColumnDataSource(events), color="red")
        p.y_range = ranges.Range1d(timerange[1], timerange[0])
        p.xaxis.major_label_orientation = math.pi/6
        p.sizing_mode = "scale_width"
        p.height = 400
        return p


In [None]:
trace = reader("/tmp/spark-trace.out")
for stat in trace.stats(): display(stat)
show(trace.seqplot())