In [None]:
import csv
import datetime
import math
import numpy as np
import os
import random
import statistics
import sys
import pandas as pd

import bokeh.io
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.models.ranges import FactorRange
from bokeh.transform import factor_cmap
from bokeh.resources import INLINE
import bokeh.io

bokeh.io.reset_output()
bokeh.io.output_notebook(INLINE)

#bokeh.io.output_notebook()

def methods(obj):
    print('Methods:')
    print('\n'.join([x for x in dir(obj) if not x.startswith('_')]))

# this log contains: git log --pretty=medium > d:\temp\performance\commits.log
commitlog = "d:/temp/performance/commits.log"

width = 800
height = 332

class Report(object):
    def __init__(self, d):
        self.__dict__ = d
        
class Result():
    def __init__(self):
        self.Name = ""
        self.HasHalt = False
        self.DoHalt = False
        self.Iterations = 0
        self.Mean = 0
        self.Error = 0
        self.StdDev = 0
        self.Min = 0
        self.Max = 0
        self.CPU = 0
        self.Collected = 0
        
class Benchmark():
    def __init__(self, filename, data):
        self.filename = filename
        self.data = data
        self.name = data[0].Name       

class DataPoint:
    def __init__(self, index, mean, min_value, max_value, error, cpu, collected, name):
        self.index = index
        self.mean = mean
        self.ci0 = min_value
        self.ci1 = max_value
        self.error = error
        self.name = name
        self.cpu = cpu
        self.collected = collected
        
def print_stats(name, values):
    min_value = min(values)
    max_value = max(values)
    spread = max_value - min_value
    print("{} has min={}, max={:.3f}, error={:.3f}%".format(name, min_value, max_value, spread * 100 / statistics.mean(values)))
              
def to_millis(v):
    p = v.split(' ')    
    t = float(p[0].replace(",",""))
    if len(p) == 1:
        return t
    units = p[1]
    if units == "ms":
        return t
    if units == "s":
        return t * 1000
    if units == "μs":
        return float(t) / 1000.0
    raise Exception("Unknown units {}".format(v))

def get_pallete(count):
    # see https://docs.bokeh.org/en/latest/docs/reference/palettes.html
    if count < 10:
        return bokeh.palettes.d3['Category10'][max(3, count)]
    else:
        return bokeh.palettes.d3['Category20'][max(3, count)]
    
def load_commits(filename):
    result = []
    commit = None
    author = None
    date = None
    with open(filename, "r", errors='ignore') as f:
        for line in f.readlines():
            if line.startswith("commit "):
                commit = line[7:].strip()
            elif line.startswith("Author: "):
                author = line[8:].strip()
            elif line.startswith("Date: "):
                date = line[6:].strip()
                date = datetime.datetime.strptime(date, '%a %b %d %H:%M:%S %Y %z')
                result += [(commit,author,date)]
    return result

def strip_dict(d):
    h = {}
    for k in d:
        ks = k.replace(" ","").replace("\ufeff","")
        h[ks] = d[k].strip()
    return h
        
def load_report(filename):
    result = []
    with open(filename, "r", encoding='utf8') as f:
        dr = csv.DictReader(f);
        for row in dr:
            try:
                row = strip_dict(row)
                r = Report(row)
                x = Result()
                iterations = 0

                if "Method" in row:
                    x.Name = r.Method
                elif "Name" in row:
                    parts = r.Name.split(' ')
                    x.Name = parts[0]
                    for p in parts[1:]:
                        nv = p.split('=')
                        if len(nv) == 2:
                            if nv[0] == "DoHalt":
                                x.HasHalt = True;
                                x.DoHalt = bool(nv[1])
                            elif nv[0] in ["NumMachines", "NumProducers", "NumMessages", "NumTransitions", "NumConsumers"]:
                                iterations = int(nv[1])
                            else:
                                raise Exception("Error parsing keyvalue {}".format(p))
                    
                if "NumProducers" in row:
                    iterations = int(r.NumProducers)
                elif "NumMachines" in row:
                    iterations = int(r.NumMachines)
                elif "NumMessages" in row:
                    iterations = int(r.NumMessages)
                elif "NumTransitions" in row:
                    iterations = int(r.NumTransitions)
                elif "NumConsumers" in row:
                    iterations = int(r.NumConsumers)
                    
                if "DoHalt" in row:
                    x.DoHalt = r.DoHalt == "True"
                    x.HasHalt = True
                if "CPU%" in row:                    
                    x.CPU = float(row["CPU%"])
                elif "Cpu" in row:
                    x.CPU = float(row["Cpu"])
                if "Gen 0" in row:                    
                    x.Collected += float(row["Gen 0"])
                    x.Collected += float(row["Gen 1"])
                    x.Collected += float(row["Gen 2"])
                    
                # .NET Core runs are failing for some reason.
                if "MinTime" in row:
                    x.Min = float(r.MinTime)
                    x.StdDev = float(r.StdDevTime)
                    x.Mean = x.Min;
                    x.Max = x.Min + x.StdDev
                    x.Iterations = iterations
                    result += [x];
                    
                elif r.Runtime == "Clr" or r.Runtime == ".NET 4.8" or r.Runtime == ".NET Core 3.1":
                    x.Mean = to_millis(r.Mean)
                    x.Error = to_millis(r.Error)
                    x.StdDev = to_millis(r.StdDev)
                    if "Min" in row:
                        x.Min =  to_millis(r.Min)
                    else:
                        x.Min = x.Mean - x.Error
                    if "Max" in row:
                        x.Max = to_millis(r.Max)
                    else:
                        x.Max = x.Mean + x.Error
                    x.Iterations = iterations
                    result += [x];
            except Exception as e:
                print("Error loading row: " + ",".join(row.keys()))
                print(e)
                
    return Benchmark(filename, result)

def load_graphs(rootdir):
    benchmarks = []
    for name in os.listdir(rootdir):
        if name.startswith("benchmark_") and not name.endswith(".zip"):
            benchmarks += [name]

    loaded = {}
    for name in benchmarks:
        b = os.path.join(rootdir, name)
        results = os.path.join(b, "results")
        pattern = ".csv"
        if os.path.exists(results):
            b = results
            pattern = "report.csv"
        for report in os.listdir(b):
            if report.endswith(pattern) and not report.endswith("summary.csv"):
                r = load_report(os.path.join(b, report))
                r.commit = name[10:]
                if r.name in loaded:
                    rows = loaded[r.name]
                    rows += [r]
                else:
                    loaded[r.name] = [r]

    commits = load_commits(commitlog)
    commits.reverse()

    graphs = []

    for name in loaded.keys():    
        benchmark = loaded[name]
        iterations = list(set([i.Iterations for i in benchmark[0].data]))
        iterations.sort()
        tests = list(set(i.Name for i in benchmark[0].data))
        tests.sort()
        for t in tests:
            for i in iterations:
                for b in [True,False]:
                    if benchmark[0].data[0].HasHalt:
                        title = "{} {} Halt={}".format(t, i, b)                    
                    else:
                        title = "{} {}".format(t, i)

                    series = []
                    index = 0
                    skipped = False
                    # order the chart by commit order so it shows history.
                    for commit, author, date in commits:
                        matching_commit = [x for x in benchmark if x.commit == commit]
                        if len(matching_commit):
                            for x in matching_commit[0].data:
                                halt_match = (x.HasHalt and x.DoHalt == b) or (x.HasHalt == False and b == True)
                                if index == 17 and not skipped:
                                    skipped = True
                                elif x.Iterations == i and x.Name == t and halt_match:
                                    mean = x.Mean
                                    series += [DataPoint(index, mean, x.Min, x.Max, x.Error, x.CPU, x.Collected, commit).__dict__]
                                    index += 1
                    if len(series):
                        graphs += [(title, series)]
    return graphs

def compare_datasets(datasets):
    min_length = sys.maxsize
    test_count = sys.maxsize
    groups = []
    data = {}
    palette = get_pallete(len(datasets))
    for ds in datasets:
        min_length = min(min_length, len(ds))
        groups += [str(len(groups) + 1)]            
        
    for i in range(min_length):
        data = []
        title = ""
        for j in range(len(datasets)):
            run = datasets[j][i]
            if not title:
                title = run[0]
                print(title)
            test_count = min(test_count, len(run[1]))
            xlabel = str(j + 1)
            measurements = pd.DataFrame(run[1])["mean"][:test_count].values
            data += [measurements]
            print_stats("run{}".format(j), measurements)
            
        index = list(range(test_count))
        x = [ (str(x), g) for x in index for g in groups ]
        
        yvalues = sum(zip(*data), ())
        source = ColumnDataSource(data=dict(x=x, counts=yvalues))

        p = figure(x_range=FactorRange(*x), plot_height=height, plot_width=width, title=title + ", min times",
                   toolbar_location=None, tools="")

        p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white",
               fill_color=factor_cmap('x', palette=palette, factors=groups, start=1, end=len(groups)))

        p.y_range.start = math.floor(min(yvalues) / 10) * 10
        p.x_range.range_padding = 0.05
        p.yaxis.axis_label = "milliseconds"
        p.xaxis.major_label_orientation = 1
        p.xgrid.grid_line_color = None
        p.xaxis.major_label_text_font_size="0px"
        p.xaxis.major_tick_line_alpha=0

        show(p)
        
def plots_with_error_bars(datasets):
    palette = get_pallete(len(datasets))
    chart_count = len(datasets[0])
    for i in range(chart_count):
        title, _ = datasets[0][i]
        print(title)
        p = figure(plot_height=height, plot_width=width, title=title)
        index = 0
        for ds in datasets:
            title, data = ds[i]
            df = pd.DataFrame(data)["mean"]
            xs = [x for x in df.index]
            ys = df.values
            yerrs = pd.DataFrame(data)["error"].values
            print_stats("run{}".format(index), ys)
        
            # create the coordinates for the errorbars             
            err_xs = []
            err_ys = []

            for x, y, yerr in zip(xs, ys, yerrs):
                err_xs.append((x, x))
                err_ys.append((y - yerr, y + yerr))

            # plot them
            color = palette[index]
            index += 1
            p.multi_line(err_xs, err_ys, color=color)
            p.circle(xs, ys, color=color, size=5, line_alpha=0) 

            if p.y_range.start:
                p.y_range.start = min(p.y_range.start, math.floor(min(ys) / 10) * 10)
            else:
                p.y_range.start = math.floor(min(ys) / 10) * 10
        show(p)
        
        

## Show Benchmark Results

In [None]:
dataset1 = load_graphs("D:\\Temp\\performance\\benchmark_newhistory1")
dataset2 = load_graphs("D:\\Temp\\performance\\benchmark_newhistory2")
plots_with_error_bars([dataset1, dataset2])

In [None]:
dataset1 = load_graphs("D:\\Temp\\performance\\benchmark_history")
dataset2 = load_graphs("D:\\Temp\\performance\\benchmark_history2")
dataset3 = load_graphs("D:\\Temp\\performance\\benchmark_history3")
dataset4 = load_graphs("D:\\Temp\\performance\\benchmark_history4")
dataset5 = load_graphs("D:\\Temp\\performance\\benchmark_history5")
plots_with_error_bars([dataset1, dataset2, dataset3, dataset4, dataset5])

## Minimums from each run

In [None]:
def get_minimal_dataset(datasets):
    chart_count = len(datasets[0])
    minimal_dataset = []
    for i in range(chart_count):
        minimal_data = None
        title = ""
        for ds in datasets:
            title, data = ds[i]
            if not minimal_data:
                minimal_data = data
            else:
                for j in range(min(len(data), len(minimal_data))):
                    if minimal_data[j]["mean"] > data[j]["mean"]:
                        minimal_data[j] = data[j]             
        minimal_dataset += [(title, minimal_data)]
    return [minimal_dataset]

plots_with_error_bars(get_minimal_dataset([dataset1, dataset2]))