In [1]:
import warnings

import pandas as pd
import seaborn as sns
import numpy as np
import altair as alt

warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None

# BlazeGraph

In [313]:
blazegraph = pd.read_csv('output/blazegraph.csv')

In [314]:
experiments = set(blazegraph['xp'].values)
for xp in experiments:
    queries = set(blazegraph[(blazegraph['xp'] == xp)]['query'].values)
    timeouts = set(blazegraph[(blazegraph['xp'] == xp) & (blazegraph['status'] == 'timeout')]['query'].values)
    errors = set(blazegraph[(blazegraph['xp'] == xp) & (blazegraph['status'] == 'error')]['query'].values).difference(timeouts)
    oks = queries.difference(timeouts.union(errors))
    blazegraph.loc[(blazegraph['xp'] == xp) & (blazegraph['query'].isin(timeouts)), 'status'] = 'timeout'
    blazegraph.loc[(blazegraph['xp'] == xp) & (blazegraph['query'].isin(timeouts)), 'execution_time'] = 900
    blazegraph.loc[(blazegraph['xp'] == xp) & (blazegraph['query'].isin(timeouts)), 'timeout'] = 1
    blazegraph.loc[(blazegraph['xp'] == xp) & (~blazegraph['query'].isin(timeouts)), 'timeout'] = 0
    blazegraph.loc[(blazegraph['xp'] == xp) & (blazegraph['query'].isin(errors)), 'status'] = 'error'
    blazegraph.loc[(blazegraph['xp'] == xp) & (blazegraph['query'].isin(errors)), 'execution_time'] = 0
    blazegraph.loc[(blazegraph['xp'] == xp) & (blazegraph['query'].isin(errors)), 'error'] = 1
    blazegraph.loc[(blazegraph['xp'] == xp) & (~blazegraph['query'].isin(errors)), 'error'] = 0
blazegraph.loc[:, 'total_execution_time'] = blazegraph['execution_time'] + blazegraph['optimization_time']

In [315]:
blazegraph.loc[~blazegraph['xp'].str.startswith('xp1'), 'xp-root'] = blazegraph['xp'].str.split('-').str[0]

## Global results

In [316]:
blazegraph.groupby(
    by=["endpoint", "xp", "query"]
).mean().groupby(
    by=["endpoint", "xp"]
).sum()[['total_execution_time', 'execution_time', 'optimization_time', 'timeout', 'error']]

Unnamed: 0_level_0,Unnamed: 1_level_0,total_execution_time,execution_time,optimization_time,timeout,error
endpoint,xp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
blazegraph,xp1,20338.870527,20338.870527,0.0,15.0,9.0
blazegraph,xp1-1,4990.062967,4990.062967,0.0,0.0,0.0
blazegraph,xp1-2,35107.304475,35107.304475,0.0,36.0,64.0
blazegraph,xp2-0,1463.815566,1373.373355,90.442211,0.0,0.0
blazegraph,xp2-1,1273.633394,1187.880191,85.753203,0.0,0.0
blazegraph,xp2-2,1496.313662,1447.892375,48.421287,0.0,0.0
blazegraph,xp2-3,1421.41673,1375.128117,46.288613,0.0,0.0
blazegraph,xp2-4,1581.679393,1535.404368,46.275025,0.0,0.0
blazegraph,xp3-0,1540.185212,1494.085242,46.09997,0.0,0.0
blazegraph,xp3-1,1788.407774,1738.750193,49.657581,0.0,0.0


In [317]:
blazegraph[~blazegraph['xp'].str.startswith('xp1')].groupby(
    by=["endpoint", "xp-root", "xp", "query"]
).mean().groupby(
    by=["endpoint", "xp-root", "xp"]
).sum().groupby(
    by=["endpoint", "xp-root"]
).mean()[['total_execution_time', 'execution_time', 'optimization_time', 'timeout', 'error']]

Unnamed: 0_level_0,Unnamed: 1_level_0,total_execution_time,execution_time,optimization_time,timeout,error
endpoint,xp-root,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
blazegraph,xp2,1447.371749,1383.935681,63.436068,0.0,0.0
blazegraph,xp3,1629.670261,1583.474861,46.1954,0.0,0.0
blazegraph,xp4,2046.474414,2006.826681,39.647733,0.2,0.0
blazegraph,xp5,1545.316359,1226.178084,319.138275,0.0,0.0
blazegraph,xp6,1583.42046,1290.853177,292.567283,0.0,0.0
blazegraph,xp7,2087.976928,1847.158241,240.818687,0.0,0.0
blazegraph,xp8,1635.576358,1577.966723,57.609635,0.0,0.0
blazegraph,xp9,1890.255432,1529.552503,360.702929,0.0,0.0


In [318]:
blazegraph[~blazegraph['xp'].str.startswith('xp1')].groupby(
    by=["endpoint", "xp-root", "xp", "query"]
).mean().groupby(
    by=["endpoint", "xp-root", "xp"]
).sum().groupby(
    by=["endpoint", "xp-root"]
).std()[['total_execution_time', 'execution_time', 'optimization_time', 'timeout', 'error']]

Unnamed: 0_level_0,Unnamed: 1_level_0,total_execution_time,execution_time,optimization_time,timeout,error
endpoint,xp-root,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
blazegraph,xp2,113.537364,128.121147,22.590742,0.0,0.0
blazegraph,xp3,207.066791,206.921163,3.514281,0.0,0.0
blazegraph,xp4,219.005443,221.629554,12.174307,0.447214,0.0
blazegraph,xp5,133.766715,127.170585,13.728749,0.0,0.0
blazegraph,xp6,206.219726,206.32439,7.628988,0.0,0.0
blazegraph,xp7,14.505384,11.309447,5.946144,0.0,0.0
blazegraph,xp8,194.852991,195.008347,1.712529,0.0,0.0
blazegraph,xp9,285.81092,287.792274,3.971569,0.0,0.0


## Main plot for BlazeGraph

In [319]:
baseline = blazegraph[blazegraph['xp'] == 'xp1']
baseline.loc[baseline['status'] == 'timeout', 'BlazeGraph (timeout; >900s)'] = 900
baseline.loc[baseline['status'] == 'error', 'BlazeGraph (error)'] = 900
baseline.loc[baseline['status'] == 'ok', 'BlazeGraph'] = baseline['total_execution_time']
baseline = baseline.groupby(by=['query']).mean().reset_index()

experiment = blazegraph[blazegraph['xp'].str.startswith('xp2')]
experiment.loc[:, 'Total Execution Time'] = experiment['total_execution_time']
experiment.loc[:, 'Execution Time'] = experiment['execution_time']
experiment = experiment.groupby(by=['query']).mean().reset_index()

df = baseline.merge(experiment, on=['query'], suffixes=['_baseline', '_xp'])
df.loc[:, 'Wikidata time limit (60s)'] = 60

In [320]:
order = list(df.sort_values(by='total_execution_time_baseline')['query'])
order.reverse()

metrics = [
    'BlazeGraph (timeout; >900s)',
    'BlazeGraph (error)',
    'BlazeGraph',
    'Total Execution Time',
    'Execution Time',
    'Wikidata time limit (60s)'
]
colors = [
    'darkgrey',
    'indianred',
    'lightgray',
    'royalblue',
    'forestgreen',
    'black'
]

base = alt.Chart(df).mark_line().transform_fold(
    metrics, as_=['Measure', 'Value']
).encode(
    alt.X(
        'query:N',
        sort=order,
        axis=alt.Axis(labels=False, ticks=False),
        title='Queries'
    ),
    alt.Color(
        'Measure:N',
        scale=alt.Scale(domain=metrics, range=colors),
        legend = alt.Legend(
            title="Legend",
            labelFontSize=16,
            titleFontSize=18,
            labelLimit=300,
            orient="bottom",
            columns=3
        )
    )
)

baseline_1 = base.mark_bar().transform_filter(
    alt.datum.Measure == metrics[0]
).encode(
    alt.Y(
        'Value:Q', 
        scale=alt.Scale(type='symlog'),
        title='Time [symlog][seconds]'
    )
)

baseline_3 = base.mark_bar().transform_filter(
    alt.datum.Measure == metrics[1]
).encode(
    alt.Y(
        'Value:Q', 
        scale=alt.Scale(type='symlog')
    )
)

baseline_2 = base.mark_bar().transform_filter(
    alt.datum.Measure == metrics[2]
).encode(
    alt.Y(
        'Value:Q', 
        scale=alt.Scale(type='symlog')
    )
)

metric_1 = base.transform_filter(
    alt.datum.Measure == metrics[3]
).encode(
    alt.Y(
        'Value:Q', 
        scale=alt.Scale(type='symlog')
    )
)

metric_2 = base.transform_filter(
    alt.datum.Measure == metrics[4]
).encode(
    alt.Y(
        'Value:Q', 
        scale=alt.Scale(type='symlog')
    )
)

timeout = base.mark_line().transform_filter(
    alt.datum.Measure == metrics[5]
).encode(
    alt.Y(
        'Value:Q', 
        scale=alt.Scale(type='symlog')
    ),
    strokeWidth=alt.value(1)
)

alt.layer(baseline_1, baseline_2, baseline_3, metric_2, metric_1 + timeout).properties(width=800).configure_axis(
    labelFontSize=16,
    titleFontSize=16
)

# Virtuoso

In [305]:
virtuoso = pd.read_csv('output/virtuoso.csv')

In [306]:
experiments = set(virtuoso['xp'].values)
for xp in experiments:
    queries = set(virtuoso[(virtuoso['xp'] == xp)]['query'].values)
    timeouts = set(virtuoso[(virtuoso['xp'] == xp) & (virtuoso['status'] == 'timeout')]['query'].values)
    errors = set(virtuoso[(virtuoso['xp'] == xp) & (virtuoso['status'] == 'error')]['query'].values).difference(timeouts)
    oks = queries.difference(timeouts.union(errors))
    virtuoso.loc[(virtuoso['xp'] == xp) & (virtuoso['query'].isin(timeouts)), 'status'] = 'timeout'
    virtuoso.loc[(virtuoso['xp'] == xp) & (virtuoso['query'].isin(timeouts)), 'execution_time'] = 900
    virtuoso.loc[(virtuoso['xp'] == xp) & (virtuoso['query'].isin(timeouts)), 'timeout'] = 1
    virtuoso.loc[(virtuoso['xp'] == xp) & (~virtuoso['query'].isin(timeouts)), 'timeout'] = 0
    virtuoso.loc[(virtuoso['xp'] == xp) & (virtuoso['query'].isin(errors)), 'status'] = 'error'
    virtuoso.loc[(virtuoso['xp'] == xp) & (virtuoso['query'].isin(errors)), 'error'] = 1
    virtuoso.loc[(virtuoso['xp'] == xp) & (~virtuoso['query'].isin(errors)), 'error'] = 0
virtuoso.loc[:, 'total_execution_time'] = virtuoso['execution_time'] + virtuoso['optimization_time']

In [307]:
virtuoso.loc[~virtuoso['xp'].str.startswith('xp1'), 'xp-root'] = virtuoso['xp'].str.split('-').str[0]

## Global results

In [308]:
virtuoso.groupby(
    by=["endpoint", "xp", "query"]
).mean().groupby(
    by=["endpoint", "xp"]
).sum()[['total_execution_time', 'execution_time', 'optimization_time', 'timeout', 'error']]

Unnamed: 0_level_0,Unnamed: 1_level_0,total_execution_time,execution_time,optimization_time,timeout,error
endpoint,xp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
virtuoso,xp1,418.776458,414.982931,3.793527,0.0,0.0
virtuoso,xp2-0,354.345836,311.076512,43.269324,0.0,0.0
virtuoso,xp2-1,368.337658,324.821291,43.516367,0.0,0.0
virtuoso,xp2-2,351.887363,308.415145,43.472219,0.0,1.0
virtuoso,xp2-3,379.21078,336.157566,43.053214,0.0,1.0
virtuoso,xp2-4,357.884516,314.843448,43.041068,0.0,0.0
virtuoso,xp3-0,342.020091,303.031453,38.988637,0.0,0.0
virtuoso,xp3-1,356.158176,317.226555,38.931621,0.0,0.0
virtuoso,xp3-2,332.743576,293.801482,38.942094,0.0,1.0
virtuoso,xp3-3,406.176539,367.210214,38.966325,0.0,1.0


In [309]:
virtuoso[~virtuoso['xp'].str.startswith('xp1')].groupby(
    by=["endpoint", "xp-root", "xp", "query"]
).mean().groupby(
    by=["endpoint", "xp-root", "xp"]
).sum().groupby(
    by=["endpoint", "xp-root"]
).mean()[['total_execution_time', 'execution_time', 'optimization_time', 'timeout', 'error']]

Unnamed: 0_level_0,Unnamed: 1_level_0,total_execution_time,execution_time,optimization_time,timeout,error
endpoint,xp-root,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
virtuoso,xp2,362.333231,319.062792,43.270438,0.0,0.4
virtuoso,xp3,356.113492,317.17782,38.935671,0.0,0.6
virtuoso,xp4,345.68503,314.811155,30.873875,0.0,0.6
virtuoso,xp5,594.40102,302.987109,291.413911,0.0,0.4
virtuoso,xp6,570.209859,301.419894,268.789965,0.0,0.0
virtuoso,xp7,528.478903,308.216113,220.262791,0.0,0.0
virtuoso,xp8,364.91306,316.908771,48.004289,0.0,0.8
virtuoso,xp9,613.69026,299.95776,313.7325,0.0,0.6


In [310]:
virtuoso[~virtuoso['xp'].str.startswith('xp1')].groupby(
    by=["endpoint", "xp-root", "xp", "query"]
).mean().groupby(
    by=["endpoint", "xp-root", "xp"]
).sum().groupby(
    by=["endpoint", "xp-root"]
).std()[['total_execution_time', 'execution_time', 'optimization_time', 'timeout', 'error']]

Unnamed: 0_level_0,Unnamed: 1_level_0,total_execution_time,execution_time,optimization_time,timeout,error
endpoint,xp-root,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
virtuoso,xp2,11.331488,11.405563,0.224162,0.0,0.547723
virtuoso,xp3,29.20175,29.187025,0.052909,0.0,0.547723
virtuoso,xp4,4.990857,4.986534,0.126935,0.0,0.894427
virtuoso,xp5,14.560015,14.922538,0.7533,0.0,0.547723
virtuoso,xp6,15.926193,16.254364,0.911263,0.0,0.0
virtuoso,xp7,2.44319,2.494269,0.226657,0.0,0.0
virtuoso,xp8,18.359244,18.327372,0.326339,0.0,0.447214
virtuoso,xp9,17.073238,11.544982,8.601082,0.0,0.894427


In [311]:
baseline = virtuoso[(virtuoso['xp'] == 'xp1') & (virtuoso['status'] == 'ok')]
baseline.loc[:, 'Virtuoso'] = baseline['total_execution_time']
baseline = baseline.groupby(by=['query']).mean().reset_index()

experiment = virtuoso[(virtuoso['xp'].str.startswith('xp2')) & (virtuoso['status'] == 'ok')]
experiment.loc[:, 'Total Execution Time'] = experiment['total_execution_time']
experiment.loc[:, 'Execution Time'] = experiment['execution_time']
experiment = experiment.groupby(by=['query']).mean().reset_index()

df = baseline.merge(experiment, on=['query'], suffixes=['_baseline', '_xp'])
df.loc[:, 'Wikidata time limit (60s)'] = 60

In [312]:
order = list(df.sort_values(by='total_execution_time_baseline')['query'])
order.reverse()

metrics = [
    'Virtuoso',
    'Total Execution Time',
    'Execution Time',
    'Wikidata time limit (60s)'
]
colors = [
    'lightgray',
    'royalblue',
    'forestgreen',
    'black'
]

base = alt.Chart(df).mark_line().transform_fold(
    metrics, as_=['Measure', 'Value']
).encode(
    alt.X(
        'query:N',
        sort=order,
        axis=alt.Axis(labels=False, ticks=False),
        title='Queries'
    ),
    alt.Color(
        'Measure:N',
        scale=alt.Scale(domain=metrics, range=colors),
        legend = alt.Legend(
            title="Legend",
            labelFontSize=16,
            titleFontSize=18,
            labelLimit=300,
            orient="bottom"
        )
    )
)

baseline_1 = base.mark_bar().transform_filter(
    alt.datum.Measure == metrics[0]
).encode(
    alt.Y(
        'Value:Q', 
        scale=alt.Scale(type='symlog'),
        title='Time [symlog][seconds]'
    )
)

metric_1 = base.transform_filter(
    alt.datum.Measure == metrics[1]
).encode(
    alt.Y(
        'Value:Q', 
        scale=alt.Scale(type='symlog')
    )
)

metric_2 = base.transform_filter(
    alt.datum.Measure == metrics[2]
).encode(
    alt.Y(
        'Value:Q', 
        scale=alt.Scale(type='symlog')
    )
)

timeout = base.mark_line().transform_filter(
    alt.datum.Measure == metrics[3]
).encode(
    alt.Y(
        'Value:Q', 
        scale=alt.Scale(type='symlog')
    ),
    strokeWidth=alt.value(1)
)

alt.layer(baseline_1, metric_2, metric_1 + timeout).properties(width=800).configure_axis(
    labelFontSize=16,
    titleFontSize=16
)