Skip to content

Commit

Permalink
V0.5.13 (#91)
Browse files Browse the repository at this point in the history
* Prepare next release

* Include Docker images for TPC-H in subfolder

* More TPC examples

* Experiments cleaned

* TPC-DS: Some example DDL and query files

* TPC-H: Some docs
  • Loading branch information
perdelt committed Aug 25, 2021
1 parent 0b79480 commit 5d2f83b
Show file tree
Hide file tree
Showing 123 changed files with 36,850 additions and 25 deletions.
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,9 @@ A more advanced workflow is: Plan a sequence of such experiments, run plan as a

## Quickstart

The repository contains a tool for running TPC-H (reading) queries at MonetDB and PostgreSQL.
The repository contains a [tool](experiments/tpch/) for running TPC-H (reading) queries at MonetDB and PostgreSQL.

1. Run `tpch run`.
This is equivalent to `python tpch.py run`.
1. Run `tpch run -sf 1 -t 30`.
1. You can watch status using `bexperiments status` while running.
This is equivalent to `python cluster.py status`.
1. After benchmarking has finished, run `bexperiments dashboard` to connect to a dashboard. You can open dashboard in browser at `http://localhost:8050`.
Expand Down
2 changes: 1 addition & 1 deletion bexhoma/scripts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
The clustermanager module
"""
__all__ = ["experimentsmanager","tpch"]
__all__ = ["experimentsmanager","tpch","tpcds"]
192 changes: 192 additions & 0 deletions bexhoma/scripts/tpcds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
"""
:Date: 2021-02-12
:Version: 0.1
:Authors: Patrick Erdelt
Perform TPC-DS inspired benchmarks in a Kubernetes cluster.
This either profiles the imported data in several DBMS and compares some statistics, or runs the TPC-H queries.
Optionally monitoring is actived.
User can choose to detach the componenten of the benchmarking system, so that as much as possible is run inside a Kubernetes (K8s) cluster.
User can also choose some parameters like number of runs per query and configuration and request some resources.
"""
from bexhoma import *
from dbmsbenchmarker import *
#import experiments
import logging
import urllib3
import logging
import argparse
import time
from timeit import default_timer
import datetime


urllib3.disable_warnings()
logging.basicConfig(level=logging.ERROR)

def do_benchmark():
description = """Perform TPC-DS inspired benchmarks in a Kubernetes cluster.
This either profiles the imported data in several DBMS and compares some statistics, or runs the TPC-DS queries.
Optionally monitoring is actived.
User can choose to detach the componenten of the benchmarking system, so that as much as possible is run inside a Kubernetes (K8s) cluster.
User can also choose some parameters like number of runs per query and configuration and request some resources.
"""
# argparse
parser = argparse.ArgumentParser(description=description)
parser.add_argument('mode', help='profile the import of TPC-DS data, or run the TPC-DS queries, or start DBMS and load data, or just start the DBMS', choices=['profiling', 'run', 'start', 'load'])
parser.add_argument('-cx', '--context', help='context of Kubernetes (for a multi cluster environment), default is current context', default=None)
parser.add_argument('-e', '--experiment', help='sets experiment code for continuing started experiment', default=None)
parser.add_argument('-d', '--detached', help='puts most of the experiment workflow inside the cluster', action='store_true')
parser.add_argument('-m', '--monitoring', help='activates monitoring', action='store_true')
parser.add_argument('-ms', '--max-sut', help='maximum number of parallel DBMS configurations, default is no limit', default=None)
parser.add_argument('-dt', '--datatransfer', help='activates datatransfer', action='store_true', default=False)
parser.add_argument('-md', '--monitoring-delay', help='time to wait [s] before execution of the runs of a query', default=10)
parser.add_argument('-nr', '--num-run', help='number of runs per query', default=1)
parser.add_argument('-nc', '--num-config', help='number of runs per configuration', default=1)
parser.add_argument('-ne', '--num-query-executors', help='comma separated list of number of parallel clients', default="1")
parser.add_argument('-sf', '--scaling-factor', help='scaling factor (SF)', default=1)
parser.add_argument('-t', '--timeout', help='timeout for a run of a query', default=180)
parser.add_argument('-rr', '--request-ram', help='request ram', default='16Gi')
parser.add_argument('-rc', '--request-cpu', help='request cpus', default='4')
parser.add_argument('-rct', '--request-cpu-type', help='request node having node label cpu=', default='')
parser.add_argument('-rg', '--request-gpu', help='request number of gpus', default=1)
parser.add_argument('-rgt', '--request-gpu-type', help='request node having node label gpu=', default='a100')
parser.add_argument('-rst', '--request-storage-type', help='request persistent storage of certain type', default=None, choices=[None, '', 'local-hdd', 'shared'])
parser.add_argument('-rss', '--request-storage-size', help='request persistent storage of certain size', default='10Gi')
parser.add_argument('-rnn', '--request-node-name', help='request a specific node', default=None)
args = parser.parse_args()
# set parameter
monitoring = args.monitoring
mode = str(args.mode)
SF = str(args.scaling_factor)
timeout = int(args.timeout)
numRun = int(args.num_run)
numExperiments = int(args.num_config)
cpu = str(args.request_cpu)
memory = str(args.request_ram)
cpu_type = str(args.request_cpu_type)
gpu_type = str(args.request_gpu_type)
gpus = str(args.request_gpu)
request_storage_type = args.request_storage_type
request_storage_size = args.request_storage_size
request_node_name = args.request_node_name
datatransfer = args.datatransfer
code = args.experiment
# set cluster
cluster = clusters.kubernetes(context=args.context)
cluster_name = cluster.contextdata['clustername']
if args.max_sut is not None:
cluster.max_sut = int(args.max_sut)
# set experiment
if code is None:
code = cluster.code
experiment = experiments.tpcds(cluster=cluster, SF=SF, timeout=timeout, detached=True, code=code, numExperiments=numExperiments)
if mode == 'run':
# we want all TPC-H queries
experiment.set_queries_full()
experiment.set_workload(
name = 'TPC-H Queries SF='+str(SF),
info = 'This experiment compares run time and resource consumption of TPC-DS queries in different DBMS.'
)
else:
# we want to profile the import
experiment.set_queries_profiling()
experiment.set_workload(
name = 'TPC-H Data Profiling SF='+str(SF),
info = 'This experiment compares imported TPC-DS data sets in different DBMS.'
)
if monitoring:
# we want to monitor resource consumption
experiment.set_querymanagement_monitoring(numRun=numRun, delay=10, datatransfer=datatransfer)
else:
# we want to just run the queries
experiment.set_querymanagement_quicktest(numRun=numRun, datatransfer=datatransfer)
# set resources for dbms
experiment.set_resources(
requests = {
'cpu': cpu,
'memory': memory,
'gpu': 0
},
limits = {
'cpu': 0,
'memory': 0
},
nodeSelector = {
'cpu': cpu_type,
'gpu': '',
})
if request_node_name is not None:
experiment.set_resources(
nodeSelector = {
'cpu': cpu_type,
'gpu': '',
'kubernetes.io/hostname': request_node_name
})
# persistent storage
print(request_storage_type)
experiment.set_storage(
storageClassName = request_storage_type,
storageSize = request_storage_size,#'100Gi',
keep = False
)
cluster.start_dashboard()
# add configs
config = configurations.default(experiment=experiment, docker='MonetDB', configuration='MonetDB-{}'.format(cluster_name), alias='DBMS A', dialect='MonetDB')
#config = configurations.default(experiment=experiment, docker='MemSQL', configuration='MemSQL-{}'.format(cluster_name), alias='DBMS B', dialect='MonetDB')
#config = configurations.default(experiment=experiment, docker='MariaDB', configuration='MariaDB-{}'.format(cluster_name), alias='DBMS C')
config = configurations.default(experiment=experiment, docker='PostgreSQL', configuration='PostgreSQL-{}'.format(cluster_name), alias='DBMS D', dialect='MonetDB')
#config = configurations.default(experiment=experiment, docker='Citus', configuration='Citus-{}'.format(cluster_name), alias='DBMS E', dialect='OmniSci')
#config = configurations.default(experiment=experiment, docker='MySQL', configuration='MySQL-{}'.format(cluster_name), alias='DBMS F')
#config = configurations.default(experiment=experiment, docker='MariaDBCS', configuration='MariaDBCS-{}'.format(cluster_name), alias='DBMS G')
#config = configurations.default(experiment=experiment, docker='Exasol', configuration='Exasol-{}'.format(cluster_name), alias='DBMS H')
#config = configurations.default(experiment=experiment, docker='DB2', configuration='DB2-{}'.format(cluster_name), alias='DBMS I')
#config = configurations.default(experiment=experiment, docker='SAPHANA', configuration='SAPHANA-{}'.format(cluster_name), alias='DBMS J', dialect='MonetDB')
#config = configurations.default(experiment=experiment, docker='Clickhouse', configuration='Clickhouse-{}'.format(cluster_name), alias='DBMS K')
#config = configurations.default(experiment=experiment, docker='SQLServer', configuration='SQLServer-{}'.format(cluster_name), alias='DBMS L')
#config = configurations.default(experiment=experiment, docker='OmniSci', configuration='OmniSci-{}'.format(cluster_name) alias='DBMS M')
if args.mode == 'start':
experiment.start_sut()
elif args.mode == 'load':
# start all DBMS
experiment.start_sut()
# configure number of clients per config = 0
list_clients = []
# total time of experiment
experiment.add_benchmark_list(list_clients)
start = default_timer()
start_datetime = str(datetime.datetime.now())
print("Experiment starts at {} ({})".format(start_datetime, start))
# run workflow
experiment.work_benchmark_list()
# total time of experiment
end = default_timer()
end_datetime = str(datetime.datetime.now())
duration_experiment = end - start
print("Experiment ends at {} ({}): {}s total".format(end_datetime, end, duration_experiment))
else:
# configure number of clients per config
list_clients = args.num_query_executors.split(",")
if len(list_clients) > 0:
list_clients = [int(x) for x in list_clients]
experiment.add_benchmark_list(list_clients)
# total time of experiment
start = default_timer()
start_datetime = str(datetime.datetime.now())
print("Experiment starts at {} ({})".format(start_datetime, start))
# run workflow
experiment.work_benchmark_list()
# total time of experiment
end = default_timer()
end_datetime = str(datetime.datetime.now())
duration_experiment = end - start
print("Experiment ends at {} ({}): {}s total".format(end_datetime, end, duration_experiment))
##################
experiment.evaluate_results()
experiment.stop_benchmarker()
experiment.stop_sut()
cluster.stop_dashboard()
cluster.start_dashboard()
# OOM? exit code 137
#experiment.zip()
exit()
26 changes: 13 additions & 13 deletions bexhoma/scripts/tpch.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,19 +132,19 @@ def do_benchmark():
)
cluster.start_dashboard()
# add configs
config = configurations.default(experiment=experiment, docker='MonetDB', alias='DBMS A', numExperiments=1, clients=[1])
#config = configurations.default(experiment=experiment, docker='MemSQL', alias='DBMS B', numExperiments=1, clients=[1])
#config = configurations.default(experiment=experiment, docker='MariaDB', alias='DBMS C', numExperiments=1, clients=[1])
config = configurations.default(experiment=experiment, docker='PostgreSQL', alias='DBMS D', numExperiments=1, clients=[1])
#config = configurations.default(experiment=experiment, docker='Citus', alias='DBMS E', numExperiments=1, dialect='OmniSci', clients=[1])
#config = configurations.default(experiment=experiment, docker='MySQL', alias='DBMS F', numExperiments=1, clients=[1])
#config = configurations.default(experiment=experiment, docker='MariaDBCS', alias='DBMS G', numExperiments=1, clients=[1])
#config = configurations.default(experiment=experiment, docker='Exasol', alias='DBMS H', numExperiments=1, clients=[1])
#config = configurations.default(experiment=experiment, docker='DB2', alias='DBMS I', numExperiments=1, clients=[1])
#config = configurations.default(experiment=experiment, docker='SAPHANA', alias='DBMS J', numExperiments=1, clients=[1])
#config = configurations.default(experiment=experiment, docker='Clickhouse', alias='DBMS K', numExperiments=1, clients=[1])
#config = configurations.default(experiment=experiment, docker='SQLServer', alias='DBMS L', numExperiments=1, clients=[1])
#config = configurations.default(experiment=experiment, docker='OmniSci', alias='DBMS M', numExperiments=1, clients=[1])
config = configurations.default(experiment=experiment, docker='MonetDB', configuration='MonetDB-{}'.format(cluster_name), alias='DBMS A')
#config = configurations.default(experiment=experiment, docker='MemSQL', configuration='MemSQL-{}'.format(cluster_name), alias='DBMS B')
#config = configurations.default(experiment=experiment, docker='MariaDB', configuration='MariaDB-{}'.format(cluster_name), alias='DBMS C')
config = configurations.default(experiment=experiment, docker='PostgreSQL', configuration='PostgreSQL-{}'.format(cluster_name), alias='DBMS D')
#config = configurations.default(experiment=experiment, docker='Citus', configuration='Citus-{}'.format(cluster_name), alias='DBMS E', dialect='OmniSci')
#config = configurations.default(experiment=experiment, docker='MySQL', configuration='MySQL-{}'.format(cluster_name), alias='DBMS F')
#config = configurations.default(experiment=experiment, docker='MariaDBCS', configuration='MariaDBCS-{}'.format(cluster_name), alias='DBMS G')
#config = configurations.default(experiment=experiment, docker='Exasol', configuration='Exasol-{}'.format(cluster_name), alias='DBMS H')
#config = configurations.default(experiment=experiment, docker='DB2', configuration='DB2-{}'.format(cluster_name), alias='DBMS I')
#config = configurations.default(experiment=experiment, docker='SAPHANA', configuration='SAPHANA-{}'.format(cluster_name), alias='DBMS J')
#config = configurations.default(experiment=experiment, docker='Clickhouse', configuration='Clickhouse-{}'.format(cluster_name), alias='DBMS K')
#config = configurations.default(experiment=experiment, docker='SQLServer', configuration='SQLServer-{}'.format(cluster_name), alias='DBMS L')
#config = configurations.default(experiment=experiment, docker='OmniSci', configuration='OmniSci-{}'.format(cluster_name), alias='DBMS M')
if args.mode == 'start':
experiment.start_sut()
elif args.mode == 'load':
Expand Down
3 changes: 0 additions & 3 deletions experiments/example/OmniSci/initdata.sql

This file was deleted.

4 changes: 0 additions & 4 deletions experiments/example/OmniSci/initschema.sql

This file was deleted.

1 change: 1 addition & 0 deletions experiments/tpcds/Citus/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
filled*.sql

0 comments on commit 5d2f83b

Please sign in to comment.