In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import json

In [2]:
with open("cpu_single_threaded.json") as f:
    cpu_st = json.load(f)
with open("cpu_multi_threaded.json") as f:
    cpu_mt = json.load(f)
with open("gpu_single_threaded.json") as f:
    gpu_st = json.load(f)
with open("gpu_multi_threaded.json") as f:
    gpu_mt = json.load(f)

In [None]:
iterations = list()
iterations.append(pd.DataFrame(data=[b for b in cpu_st["benchmarks"] if b["run_type"] == "iteration"]))
iterations.append(pd.DataFrame(data=[b for b in cpu_mt["benchmarks"] if b["run_type"] == "iteration"]))
iterations.append(pd.DataFrame(data=[b for b in gpu_st["benchmarks"] if b["run_type"] == "iteration"]))
iterations.append(pd.DataFrame(data=[b for b in gpu_mt["benchmarks"] if b["run_type"] == "iteration"]))
dataset = pd.concat(iterations)
dataset.reset_index(drop=True, inplace=True)
dataset

In [None]:
aggregates = list()
aggregates.append(pd.DataFrame(data=[b for b in cpu_st["benchmarks"] if b["run_type"] == "aggregate"]))
aggregates.append(pd.DataFrame(data=[b for b in cpu_mt["benchmarks"] if b["run_type"] == "aggregate"]))
aggregates.append(pd.DataFrame(data=[b for b in gpu_st["benchmarks"] if b["run_type"] == "aggregate"]))
aggregates.append(pd.DataFrame(data=[b for b in gpu_mt["benchmarks"] if b["run_type"] == "aggregate"]))
dataset_aggr = pd.concat(aggregates)
dataset_aggr.reset_index(drop=True, inplace=True)
dataset_aggr


In [None]:
benchmarks = dataset
benchmarks['k'] = benchmarks['name'].str.split('/').str[2]
benchmarks['device'] = benchmarks['name'].str.split('/').str[1].str.split('_').str[0]
benchmarks['threading'] = benchmarks['name'].str.split('/').str[1].str.split('_').str[1]

time_filter = benchmarks['time_unit'] == 'ms'
benchmarks.loc[time_filter, ['real_time']] /= 1000
benchmarks.loc[time_filter, ['cpu_time']] /= 1000
benchmarks.loc[time_filter, ['time_unit']] = 's'
benchmarks = benchmarks[['k', 'device', 'threading', 'repetitions', 'repetition_index', 'iterations', 'real_time', 'cpu_time', 'time_unit']]
benchmarks

In [None]:
metrics = dataset_aggr
metrics['k'] = metrics['per_family_instance_index'] + 10
metrics['device'] = metrics['name'].str.split('/').str[1].str.split('_').str[0]
metrics['threading'] = metrics['name'].str.split('/').str[1].str.split('_').str[1]

time_filter_m = (metrics['time_unit'] == "ms") & (metrics['aggregate_unit'] == "time")
metrics.loc[time_filter_m, 'real_time'] /= 1000
metrics.loc[time_filter_m, 'cpu_time'] /= 1000
metrics.loc[metrics['time_unit'] == "ms", 'time_unit'] = 's'
metrics = metrics[['k', 'device', 'threading', 'repetitions', 'aggregate_name', 'aggregate_unit', 'iterations', 'real_time', 'cpu_time', 'time_unit']]
metrics

In [None]:
benchmarks['type'] = benchmarks['device'] + '-' + benchmarks['threading']
metrics['type'] = metrics['device'] + '-' + metrics['threading']
benchmarks = benchmarks.query('type!="gpu-multi"')
metrics = metrics.query('type!="gpu-multi"')

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=benchmarks, x='k', y='real_time', hue='type', style='type', alpha=0.8)
#sns.lineplot(data=benchmarks, x='k', y='real_time', hue='type', style='type', err_style=None)
plt.xlabel('k')
plt.ylabel('real time (s)')
plt.yscale('log')
plt.title('Comparison time by device and threading')
plt.legend(title='Device-Threading', bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.lineplot(data=benchmarks, x='k', y='real_time', hue='type', style='type', errorbar=('ci', 99), err_style='bars')
plt.xlabel('k')
plt.ylabel('real time (s)')
plt.yscale('log')
plt.title('Comparison time by device and threading (average / standard deviation)')
plt.legend(title='Device-Threading', bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
df_single = benchmarks.query('device=="cpu" & threading=="single"')
df_single_gpu = benchmarks.query('device=="gpu" & threading=="single"')
df_multi = benchmarks.query('device=="cpu" & threading=="multi"')
df_multi_gpu = benchmarks.query('device=="gpu" & threading=="multi"')

#sns.scatterplot(data=benchmarks, x='k', y='real_time', hue='type', alpha=0.7, legend=None)
#sns.lineplot(data=benchmarks.query('device=="cpu"'), x='k', y='real_time', hue='type', style='type', err_style=None, alpha=0.5)
plt.figure(figsize=(10, 6))
sns.regplot(x=pd.to_numeric(df_single['k']), y=pd.to_numeric(df_single['real_time']), label='cpu-single', marker='x', ci=99)
sns.regplot(x=pd.to_numeric(df_single_gpu['k']), y=pd.to_numeric(df_single_gpu['real_time']), label='gpu-single', marker='o', line_kws={"ls":"--"}, ci=99)
sns.regplot(x=pd.to_numeric(df_multi['k']), y=pd.to_numeric(df_multi['real_time']), label='cpu-multi', line_kws={"ls":"-."}, marker='d', ci=99)
#sns.regplot(x=pd.to_numeric(df_multi_gpu['k']), y=pd.to_numeric(df_multi_gpu['real_time']), label='gpu-multi', line_kws={"ls":":"}, marker='v', ci=99)
plt.xlabel('k')
plt.ylabel('real time (s)')
plt.yscale('log')
plt.title('Comparison time by device and threading (linear regression)')
plt.legend(title='Device-Threading', bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.regplot(x=pd.to_numeric(df_single['k']), y=pd.to_numeric(df_single['real_time']), label='cpu-single', marker='x', ci=99)
sns.regplot(x=pd.to_numeric(df_single_gpu['k']), y=pd.to_numeric(df_single_gpu['real_time']), label='gpu-single', marker='o', line_kws={"ls":"--"}, ci=99)
sns.regplot(x=pd.to_numeric(df_multi['k']), y=pd.to_numeric(df_multi['real_time']), label='cpu-multi', line_kws={"ls":"-."}, marker='d', ci=99)
#sns.regplot(x=pd.to_numeric(df_multi_gpu['k']), y=pd.to_numeric(df_multi_gpu['real_time']), label='gpu-multi', line_kws={"ls":":"}, marker='v', ci=99)
plt.xlabel('k')
plt.ylabel('real time (s)')
plt.title('Comparison time by device and threading (linear regression)')
plt.legend(title='Device-Threading', bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
df_mean = metrics.query('aggregate_name=="mean"')
df_median = metrics.query('aggregate_name=="median"')
df_stddev = metrics.query('aggregate_name=="stddev"')
df_cv = metrics.query('aggregate_name=="cv"')

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
sns.scatterplot(data=df_mean, x='k', y='real_time', hue='type', style='type', ax=axes[0, 0], legend=None)
sns.lineplot(data=df_mean, x='k', y='real_time', hue='type', style='type', ax=axes[0, 0], legend=None, alpha=0.3)
axes[0, 0].set_xlabel('k')
axes[0, 0].set_ylabel('time (s)')
axes[0, 0].set_yscale('log')
axes[0, 0].set_title('Average execution time')

sns.scatterplot(data=df_median, x='k', y='real_time', hue='type', style='type', ax=axes[0, 1], legend=None)
sns.lineplot(data=df_median, x='k', y='real_time', hue='type', style='type', ax=axes[0, 1], legend=None, alpha=0.3)
axes[0, 1].set_xlabel('k')
axes[0, 1].set_ylabel('time (s)')
axes[0, 1].set_yscale('log')
axes[0, 1].set_title('Median execution time')

sns.scatterplot(data=df_stddev, x='k', y='real_time', hue='type', style='type', ax=axes[1, 0], legend=None)
sns.lineplot(data=df_stddev, x='k', y='real_time', hue='type', style='type', ax=axes[1, 0], legend=None, alpha=0.5)
axes[1, 0].set_xlabel('k')
axes[1, 0].set_ylabel('time (s)')
axes[1, 0].set_yscale('log')
axes[1, 0].set_title('Standard deviation')

sns.scatterplot(data=df_cv, x='k', y=df_cv['real_time'] * 100, hue='type', style='type', ax=axes[1, 1])
sns.lineplot(data=df_cv, x='k', y=df_cv['real_time'] * 100, hue='type', style='type', ax=axes[1, 1], alpha=0.3)
axes[1, 1].set_xlabel('k')
axes[1, 1].set_ylabel('%')
#axes[1, 1].set_ylim(0, 10)
axes[1, 1].set_title('Coefficient of variation')


#plt.legend(title='Device-Threading', bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()

In [None]:
with open('gpu_range.json') as f:
    gpu_range = json.load(f)

iterations = list()
iterations.append(pd.DataFrame(data=[b for b in gpu_range["benchmarks"] if b["run_type"] == "iteration"]))
dataset = pd.concat(iterations)
dataset.reset_index(drop=True, inplace=True)

df = dataset
df['k'] = df['name'].str.split('/').str[2]
df['device'] = df['name'].str.split('/').str[1].str.split('_').str[0]
df['threading'] = df['name'].str.split('/').str[1].str.split('_').str[1]

time_filter = df['time_unit'] == 'ms'
df.loc[time_filter, ['real_time']] /= 1000
df.loc[time_filter, ['cpu_time']] /= 1000
df.loc[time_filter, ['time_unit']] = 's'
df = df[['k', 'device', 'threading', 'repetitions', 'repetition_index', 'iterations', 'real_time', 'cpu_time', 'time_unit']]
df['k'] = pd.to_numeric(df['k'])
df2 = benchmarks.drop(['type'], axis=1)
df_gpu = pd.concat([df2.query('device=="gpu" & threading=="single"'), df.query('k > 20')]).reset_index()
df_gpu

In [None]:
plt.figure(figsize=(10, 6))
sns.regplot(x=pd.to_numeric(df_gpu['k']), y=pd.to_numeric(df_gpu['real_time']), label='gpu-single', ci=99)
plt.xlabel('k')
plt.ylabel('real time (s)')
plt.title('GPU comparison time (linear regression)')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.regplot(x=pd.to_numeric(df_gpu['k']), y=pd.to_numeric(df_gpu['real_time']), label='gpu-single', x_estimator=np.mean, order=1, ci=99, marker='.')
plt.xlabel('k')
plt.ylabel('real time (s)')
plt.title('GPU comparison time (linear regression, CI 99%)')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.lineplot(x=pd.to_numeric(df_gpu['k']), y=pd.to_numeric(df_gpu['real_time']), errorbar=('ci', 99), err_style='bars')
#sns.scatterplot(x=pd.to_numeric(df_gpu['k']), y=pd.to_numeric(df_gpu['real_time']), alpha=0.8)
plt.xlabel('k')
plt.ylabel('real time (s)')
plt.title('GPU comparison time (average, CI 99%)')
plt.tight_layout()
plt.show()