In [2]:
%matplotlib notebook
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt


def read_sqlite(file):
    conn = sqlite3.connect(file)
    try:
        df = pd.read_sql_query('SELECT * FROM result', conn)
    except:
        return None
    finally:
        conn.close()
    return df

In [3]:
vals = [0.1, 0.178, 0.31, 0.562, 1, 1.778, 3.162, 5.623, 10, 17.783, 31.622, 56.234, 100]
dfs = dict(map(lambda x: (x, read_sqlite('results/duckdb_tpch1_flip_stack_{0:g}_results.sqlite'.format(x))), vals))
dfs

{0.1:        iteration  hostname  result  exited  return_code  signaled  term_sig  \
 0              2  rocks204       0       1            0         0         0   
 1              0  rocks204       0       1            0         0         0   
 2              1  rocks204       0       1            0         0         0   
 3              4  rocks204       0       1            0         0         0   
 4              3  rocks204       0       1            0         0         0   
 5              6  rocks204       0       1            0         0         0   
 6              5  rocks204       0       1            0         0         0   
 7              7  rocks204       0       1            0         0         0   
 8              8  rocks204       0       1            0         0         0   
 9              9  rocks204       0       1            0         0         0   
 10            10  rocks204       0       1            0         0         0   
 11            11  rocks204       0

In [4]:
for val in vals:
    dfs[val]['rate'] = val

In [5]:
merged_df = pd.concat(list(dfs.values()))
merged_df

Unnamed: 0,iteration,hostname,result,exited,return_code,signaled,term_sig,runtime,fault_count,max_heap_size,max_stack_size,rate
0,2,rocks204,0,1,0,0,0,0.505009,0,242778112,135168,0.1
1,0,rocks204,0,1,0,0,0,0.505444,0,242180096,135168,0.1
2,1,rocks204,0,1,0,0,0,0.505000,0,242180096,135168,0.1
3,4,rocks204,0,1,0,0,0,0.605044,0,287531008,135168,0.1
4,3,rocks204,0,1,0,0,0,0.606576,0,286068736,135168,0.1
5,6,rocks204,0,1,0,0,0,0.504880,0,238329856,135168,0.1
6,5,rocks204,0,1,0,0,0,0.605202,0,290623488,135168,0.1
7,7,rocks204,0,1,0,0,0,0.605406,0,275824640,135168,0.1
8,8,rocks204,0,1,0,0,0,0.605555,0,286203904,135168,0.1
9,9,rocks204,0,1,0,0,0,0.606424,0,238329856,135168,0.1


In [6]:
df = merged_df.groupby(['result', 'rate']).size().reset_index(name='counts')
df

Unnamed: 0,result,rate,counts
0,0,0.1,97478
1,0,0.178,97451
2,0,0.31,97407
3,0,0.562,97363
4,0,1.0,97324
5,0,1.778,97126
6,0,3.162,96903
7,0,5.623,96592
8,0,10.0,96244
9,0,17.783,96240


In [7]:
for result in range(5):
    for rate in vals:
        if len(df[(df['result'] == result) & (df['rate'] == rate)]) == 0:
            df = df.append({
                'result': result,
                'rate': rate,
                'counts': 0
            }, ignore_index=True)

df['result'] = df['result'].astype(int)
df['counts'] = df['counts'].astype(int)
df.sort_values(['result', 'rate'], inplace=True)
df

Unnamed: 0,result,rate,counts
0,0,0.100,97478
1,0,0.178,97451
2,0,0.310,97407
3,0,0.562,97363
4,0,1.000,97324
5,0,1.778,97126
6,0,3.162,96903
7,0,5.623,96592
8,0,10.000,96244
9,0,17.783,96240


In [8]:
pivoted = pd.pivot_table(df, values='counts', index='rate', columns='result')
pivoted.columns = ['ok', 'incorrect', 'crash', 'abnormal', 'timeout']
pivoted

Unnamed: 0_level_0,ok,incorrect,crash,abnormal,timeout
rate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.1,97478,1,21,0,0
0.178,97451,1,48,0,0
0.31,97407,0,92,0,1
0.562,97363,5,131,0,1
1.0,97324,4,171,0,1
1.778,97126,19,353,2,0
3.162,96903,26,569,1,1
5.623,96592,31,872,4,1
10.0,96244,59,1191,1,5
17.783,96240,62,1192,4,2


In [9]:
fig = plt.figure()
fig.suptitle('Results')
plt.ylabel('Count')
plt.xlabel('Fault rate')
plt.xscale('log')
plt.plot(vals, pivoted['ok'], label='ok')
plt.plot(vals, pivoted['incorrect'], label='incorrect')
plt.plot(vals, pivoted['crash'], label='crash')
plt.plot(vals, pivoted['abnormal'], label='abnormal')
plt.plot(vals, pivoted['timeout'], label='timeout')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

In [15]:
fig = plt.figure()
fig.suptitle('Results')
plt.ylabel('Count')
plt.xlabel('Fault rate')
plt.xscale('log')
plt.plot(vals, pivoted['incorrect'], label='incorrect')
plt.plot(vals, pivoted['abnormal'], label='abnormal')
plt.plot(vals, pivoted['timeout'], label='timeout')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

In [13]:
fig = plt.figure()

fig.suptitle('Stacked')
plt.ylabel('Count')
plt.xlabel('Fault rate')
plt.xscale('log')
plt.stackplot(vals,
              pivoted['incorrect'],
              pivoted['abnormal'],
              pivoted['timeout'],
              pivoted['crash'],
              baseline='zero',
              labels=['incorrect', 'abnormal', 'timeout', 'crash'])
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

In [16]:
fig = plt.figure()

fig.suptitle('Stacked')
plt.ylabel('Count')
plt.xlabel('Fault rate')
plt.xscale('log')
plt.stackplot(vals,
              pivoted['incorrect'],
              pivoted['abnormal'],
              pivoted['timeout'],
              baseline='zero',
              labels=['incorrect', 'abnormal', 'timeout'])
plt.show()

<IPython.core.display.Javascript object>

In [17]:
fig = plt.figure()

fig.suptitle('Crash runtime')

df_2 = merged_df[merged_df['result'] == 2]
data_2 = list(map(lambda x: list(df_2[df_2['rate'] == x]['runtime']), vals))

plt.boxplot(data_2, showfliers=False)
plt.show()

<IPython.core.display.Javascript object>

In [18]:
fig = plt.figure()

fig.suptitle('Incorrect runtime')

df_2 = merged_df[merged_df['result'] == 1]
data_2 = list(map(lambda x: list(df_2[df_2['rate'] == x]['runtime']), vals))

plt.boxplot(data_2, showfliers=False)
plt.show()

<IPython.core.display.Javascript object>

In [19]:
fig = plt.figure()

fig.suptitle('Abnormal runtime')

df_2 = merged_df[merged_df['result'] == 3]
data_2 = list(map(lambda x: list(df_2[df_2['rate'] == x]['runtime']), vals))

plt.boxplot(data_2, showfliers=False)
plt.show()

<IPython.core.display.Javascript object>

In [20]:
fig = plt.figure()

fig.suptitle('Ok runtime')

df_2 = merged_df[merged_df['result'] == 0]
data_2 = list(map(lambda x: list(df_2[df_2['rate'] == x]['runtime']), vals))

plt.boxplot(data_2, showfliers=False)
plt.show()

<IPython.core.display.Javascript object>

In [21]:
crashes = merged_df[merged_df['result'] == 2]
signals = crashes['term_sig'].unique()
crash_count = crashes.groupby(['term_sig', 'rate']).size().reset_index(name='counts')
crash_pivoted = pd.pivot_table(crash_count, values='counts', index='rate', columns='term_sig')
crash_pivoted

term_sig,4,5,6,7,8,11
rate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.1,,,,,,21.0
0.178,,,2.0,,,46.0
0.31,,,8.0,2.0,,82.0
0.562,1.0,,8.0,2.0,1.0,119.0
1.0,,,13.0,1.0,,157.0
1.778,2.0,,22.0,11.0,,318.0
3.162,1.0,,28.0,11.0,,529.0
5.623,1.0,,52.0,13.0,,806.0
10.0,3.0,,70.0,20.0,2.0,1096.0
17.783,5.0,1.0,49.0,22.0,,1115.0


In [22]:
fig = plt.figure()
fig.suptitle('Crash types')
plt.ylabel('Count')
plt.xlabel('Fault rate')
plt.xscale('log')

for signal in signals:
    plt.plot(vals, crash_pivoted[signal], label=str(signal))
    
plt.legend()
plt.show()

<IPython.core.display.Javascript object>