In [1]:
import sys
import glob
import os
import re
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import matplotlib.ticker as mtick
%matplotlib inline

In [45]:
def check_uses(parts):
    uses_cls = False
    try:
        parts.remove("--use-cls")
        uses_cls = True
    except:
        pass
    uses_idx = False
    try:
        parts.remove("--use-index")
        uses_idx = True
    except:
        pass
    return uses_cls, uses_idx

def transform_parts(parts):
    for part in parts:
        if "=" in part:
            key, val = part.split("=")
            yield "--" + key
            try:
                yield float(val)
            except:
                yield val
        else:
            yield part
    
def process_line(line):
    if line[:9] != "run-query":
        return None
    # remove unused stuff
    parts = line.split()
    ignore = ["run-query", "--quiet"] + \
        ["run-%d" % (i,) for i in range(1,7)]
    parts = filter(lambda v: v not in ignore, parts)
    # extract runtime
    runtime = float(parts[-1])
    del parts[-1]
    # split params with equal sign
    parts = list(transform_parts(parts))
    
    uses_cls, uses_idx = check_uses(parts)
    assert len(parts) % 2 == 0    
    res = {'runtime': runtime, 'cls': uses_cls, 'idx': uses_idx}
    pit = iter(parts)
    for key in pit:
        assert key[:2] == "--"
        key = key[2:]
        val = next(pit)
        try:
            val = float(val)
        except:
            pass
        res.update({key:val})
    return res
    
def load_traces(fn):
    with open(fn, 'r') as f:
        results = filter(None, map(process_line, f.readlines()))
        return pd.DataFrame(results)
    
def combine_traces(*fns):
    fnit = iter(fns)
    dfs = []
    for fn in fnit:
        df = load_traces(fn)
        df["nosds"] = next(fnit)
        dfs.append(df)
    return pd.concat(dfs)

df = combine_traces("1osds.log", 1, "2osds.log", 2, "4osds.log", 4, "8osds.log", 8, "16osds.log", 16)
del df["comment_regex"]
del df["start"]
del df["end"]
del df["line-number"]
del df["order-key"]
del df["pool"]
del df["qdepth"]
del df["wthreads"]
del df["num-objs"]
del df["extended-price"]

In [53]:
df

Unnamed: 0,cache,cls,idx,query,runtime,selectivity-pct,nosds
0,cold,False,False,a,437.579811,1,1
1,cold,False,False,a,385.686900,1,1
2,cold,False,False,a,437.985456,1,1
3,hot,False,False,a,423.149219,1,1
4,hot,False,False,a,439.277252,1,1
5,hot,False,False,a,426.768319,1,1
6,cold,True,False,a,285.183889,1,1
7,cold,True,False,a,285.444093,1,1
8,cold,True,False,a,285.605075,1,1
9,hot,True,False,a,35.154342,1,1
