In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import glob as gb
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

mpl.rcParams['figure.figsize'] = (16, 9)

In [None]:
PERF_DIR = Path('/Users/julien/Software/Others/OS-build-release/results/')

In [None]:
csvs = list(PERF_DIR.glob('*.csv'))

In [None]:
csv = csvs[0]

In [None]:
def find_skip_rows(results_file: Path) -> int:
    """
    Open the result benchmark, and locate the actual start of the timings

    Returns:
    --------
    skiprows (int): the number of lines to skip when read_csv is called
    """
    skiprows = 0
    search_str = 'name,iterations,'
    with open(results_file, 'r') as f:
        content = f.read()
    if search_str not in content:
        return None
    lines = content.splitlines()
    while (search_str not in lines[skiprows]):
        skiprows += 1

    return skiprows

In [None]:
def read_bench_results(results_file: Path) -> pd.DataFrame:
    skiprows = find_skip_rows(csv)
    if skiprows is None:
        return None
    df = pd.read_csv(csv, skiprows=skiprows, index_col=0)
    df = df.loc[df['iterations'].notnull()]
    # Convert everything in ms
    for col in ['real_time', 'cpu_time']:
        df[col] = (df[[col, 'time_unit']].apply(
            lambda row: pd.to_timedelta(arg=row[0], unit=row[1]), axis=1)
                   .dt.total_seconds() * 1e3)
        
    return df

In [None]:
df_dict = {}
dfs = []
for csv in csvs:
    fname = csv.name
    #if 'Model_ModelObjects' in fname:
    #    fname = 'Model_ModelObjects'
    prefix  = Path(csv).name.split('_')[0]
    test = '_'.join(fname.split('_')[1:-1])
    
    df = read_bench_results(csv)
    if df is None:
        continue
    df['test_file'] = test
    df['prefix'] = prefix
    dfs.append(df)
    
df = pd.concat(dfs, axis=0)
df.set_index(['prefix', 'test_file'], append=True, inplace=True)
df_real = df['real_time'].unstack('prefix')

In [None]:
df_real[df_real['moveAllModelResizeAndLoop'].notnull()].sum(axis=0).sort_values()

In [None]:
df_sum = df_real[df_real['moveAllModelResizeAndLoop2'].notnull()].groupby('test_file').sum()
df_sum['orim'] = df_sum[['ori', 'ori2']].mean(axis=1)
df_sum[['orim', 'move1', 'move2', 'move3', 'moveAll', 'moveAll2', 'moveAllModelResizeAndLoop', 'moveAllModelResizeAndLoop2']]

In [None]:
df_byfile = df_sum[['orim', 'moveAll', 'moveAll2', 'moveAllModelResizeAndLoop', 'moveAllModelResizeAndLoop2']].groupby('test_file').sum()

(df_byfile.divide(df_byfile['orim'], axis=0) - 1).style.format('{:.2%}')

In [None]:
df_sum[['orim', 'moveAllModelResizeAndLoop', 'moveAllModelResizeAndLoop2']].groupby('test_file').sum().pct_change(axis=1).iloc[:, 1:].style.format('{:.2%}')

In [None]:
df_real.swaplevel(0, 1, axis=0).loc['Model', ['ori', 'ori2', 'moveAll', 'moveAll2', 'moveAllModelResizeAndLoop2']].plot(kind='barh')

# Plot the interesting stuff that is size dependent

In [None]:
cols = ['ori', 'ori2', 'moveAll', 'moveAll2', 'moveAllModelResizeAndLoop2']

In [None]:
test_files = ['ThermalZoneCombineSpaces', 'Model_ModelObjects', 'Model',
         'Vector_remove_vs_copy', 'Workspace', 'ForwardTranslator']

In [None]:
for test_file in test_files:
    toplot = df_real.swaplevel(0, 1, axis=0).loc[test_file, cols]
    toplot.index = toplot.index.str.split('/', expand=True)
    toplot.index = toplot.index.set_levels(level=1, levels=toplot.index.levels[1].astype(int))
    names = ['Test', 'N']
    groupby = ['Test']
    if test_file == 'ForwardTranslator':
        toplot.index = toplot.index.set_levels(level=2, levels=toplot.index.levels[2].astype(int).astype(bool))
        names.insert(-1, 'ExcludeSpaceTranslation')
        groupby.append('ExcludeSpaceTranslation')
        toplot.index = toplot.index.swaplevel(-1, 1)
    toplot.index.names = names
    
    toplot.sort_index(inplace=True)
    #toplot.plot(kind='barh')

    grouped = toplot.groupby(groupby)

    ncols = 1
    nrows = int(np.ceil(grouped.ngroups/ncols))

    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(16, nrows*5), sharey=False)

    for (key, ax) in zip(grouped.groups.keys(), axes.flatten()):
        grouped.get_group(key).loc[key].plot(ax=ax)
        title = f'{test_file}'
        if isinstance(key, str):
            key = [key]
        for k,v in zip(groupby, key):
            title += f', {k}={v}'
        ax.set_title(title)

    ax.legend()
    plt.show()

# Plot categorical stuff

In [None]:
df_real

In [None]:
df_real.swaplevel(0, 1, axis=0).loc['VersionTranslation', cols]

In [None]:
df_real.index.get_level_values(1).unique()

In [None]:
toplot = df_real.swaplevel(0, 1, axis=0).loc[test_files, cols]
toplot.plot(kind='barh', figsize=(16, 0.75*toplot.shape[0]))

In [None]:
test_files = ['LoadIdfFile', 'VersionTranslation', 'IdfObjectParse', 'LoadIdd']
test_file = test_files[0]
for test_file in test_files:
    print(test_file)
    toplot = df_real.swaplevel(0, 1, axis=0).loc[test_file, cols]
    toplot.index = toplot.index.str.split('/', expand=True)
    if toplot.index.nlevels > 1 and toplot.index.levels[0].size == 1:
        toplot.index = toplot.index.droplevel(level=0)
    if toplot.index.nlevels > 1 and toplot.index.levels[1].size == 1:
        toplot.index = toplot.index.droplevel(level=1)
    if toplot.index.nlevels > 1:
        raise
        
    fig, ax = plt.subplots(figsize=(16, 1*toplot.shape[0]))
    toplot.plot(kind='barh', ax=ax)
    ax.set_title(test_file)


In [None]:
toplot.plot(kind='barh')

In [None]:
toplot.plot(kind='barh')

In [None]:
names = ['Test', 'N']
groupby = ['Test']
if test_file == 'ForwardTranslator':
    toplot.index = toplot.index.set_levels(level=2, levels=toplot.index.levels[2].astype(int).astype(bool))
    names.insert(-1, 'ExcludeSpaceTranslation')
    groupby.append('ExcludeSpaceTranslation')
    toplot.index = toplot.index.swaplevel(-1, 1)
toplot.index.names = names

toplot.sort_index(inplace=True)
#toplot.plot(kind='barh')

In [None]:
df_real.swaplevel(0, 1, axis=0).loc['ThermalZoneCombineSpaces', ['ori', 'ori2', 'moveAll', 'moveAll2', 'moveAll2b']].plot(kind='barh')

In [None]:
100*(toplot.max(axis=1).divide(toplot.min(axis=1)) - 1)

In [None]:
toplot.agg(['min', 'max'], axis=1)

In [None]:
toplot = df_real.swaplevel(0, 1, axis=0).loc[
    'ForwardTranslator',
#    'ThermalZoneCombineSpaces',
    [x for x in df_real.columns if
     #'ori' in x or
     'moveAll2' in x]]

fig, ax = plt.subplots(figsize=(16, 16))
(100*(toplot.divide(toplot.mean(axis=1), axis=0) - 1)).plot(kind='barh', ax=ax)
ax.xaxis.set_major_formatter(mpl.ticker.PercentFormatter())

In [None]:
df_real.index.tolist()

In [None]:
df_real.swaplevel(0, 1, axis=0).loc['Model', ['ori', 'ori2', 'moveAll', 'moveAll2']].plot(kind='barh')

In [None]:
grouped = df_real['ori', 'moveAll', 'moveAll2']].groupby('test_file')

ncols = 1
nrows = int(np.ceil(grouped.ngroups/ncols))

fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(16, nrows*4), sharey=False)

for (key, ax) in zip(grouped.groups.keys(), axes.flatten()):
    grouped.get_group(key).plot(kind='barh', ax=ax)

ax.legend()
plt.show()

In [None]:
df_real.plot(figsize=(16, 9))

In [None]:
df_real['orim'] = df_real[['ori', 'ori2']].mean(axis=1)
df_real['newm'] = df_real[['new'
                           #, 'new2'
                          ]].mean(axis=1)

In [None]:
(df_real[['orim', 'newm']]).pct_change(axis=1)['newm'].sort_values().plot(kind='barh', figsize=(16, 36))

In [None]:
(df_real['newm'] - df_real['orim']).sort_values().plot(kind='barh', figsize=(16, 9))

In [None]:
df_real[['orim', 'newm']].plot(kind='barh', figsize=(16, 36))

In [None]:
df_real[['orim', 'newm']].mean()