Skip to content

Commit

Permalink
V0.4.2 (#36)
Browse files Browse the repository at this point in the history
* Prepare next release

* K8s: Parametrize DDL scripts

* K8s: No pickled reports

* Demo: Typo

* Docs: API of set_ methods

* Docs: Deployments

* Docs: DBMS

* Docs: References
  • Loading branch information
perdelt committed Sep 10, 2020
1 parent 9de59c4 commit 2c4e56e
Show file tree
Hide file tree
Showing 11 changed files with 386 additions and 48 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ This tool supports AWS and kubernetes (k8s) based clusters.
This documentation
* illustrates the [concepts](docs/Concept.md)
* provides a basic [TPC-H like example](docs/Example-TPC-H.md)
* shows how to use several [DBMS](docs/DBMS.md)
* MariaDB
* MonetDB
* OmniSci
* PostgreSQL
* illustrates the deployment in [Kubernetes](docs/Deployments.md)
* provides [more detailed examples](docs/Examples.md)
* [Example: TPC-H Benchmark for 3 DBMS on 1 Virtual Machine](docs/Examples.md#example-tpc-h-benchmark-for-3-dbms-on-1-virtual-machine)
* [Example: TPC-H Benchmark for 1 DBMS on 3 Virtual Machines](docs/Examples.md#example-tpc-h-benchmark-for-1-dbms-on-3-virtual-machines)
Expand Down
27 changes: 21 additions & 6 deletions bexhoma/masterK8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(self, clusterconfig='cluster.config', configfolder='experiments/',
self.clusterconfig = clusterconfig
self.timeLoading = 0
self.resources = {}
self.ddl_parameters = {}
self.connectionmanagement = {}
self.connectionmanagement['numProcesses'] = None
self.connectionmanagement['runsPerConnection'] = None
Expand Down Expand Up @@ -77,6 +78,8 @@ def set_querymanagement(self, **kwargs):
self.querymanagement = kwargs
def set_resources(self, **kwargs):
self.resources = kwargs
def set_ddl_parameters(self, **kwargs):
self.ddl_parameters = kwargs
def set_code(self, code):
return self.setCode(code)
def setCode(self, code):
Expand Down Expand Up @@ -490,10 +493,22 @@ def prepareInit(self):
# cmd['copy_init_scripts'] = 'cp {scriptname}'.format(scriptname=scriptfolder+script)+' /data/'+str(self.code)+'/'+self.connection+'_init_'+str(i)+'.log'
# stdin, stdout, stderr = self.executeCTL(cmd['copy_init_scripts'])
# i = i + 1
for script in self.initscript:
filename = self.d+'/'+script
if os.path.isfile(self.configfolder+'/'+filename):
self.kubectl('kubectl cp --container dbms {from_name} {to_name}'.format(from_name=self.configfolder+'/'+filename, to_name=self.activepod+':'+scriptfolder+script))
if len(self.ddl_parameters):
for script in self.initscript:
filename_template = self.d+'/'+script
if os.path.isfile(self.configfolder+'/'+filename_template):
with open(self.configfolder+'/'+filename_template, "r") as initscript_template:
data = initscript_template.read()
data = data.format(**self.ddl_parameters)
filename_filled = self.d+'/filled_'+script
with open(self.configfolder+'/'+filename_filled, "w") as initscript_filled:
initscript_filled.write(data)
self.kubectl('kubectl cp --container dbms {from_name} {to_name}'.format(from_name=self.configfolder+'/'+filename_filled, to_name=self.activepod+':'+scriptfolder+script))
else:
for script in self.initscript:
filename = self.d+'/'+script
if os.path.isfile(self.configfolder+'/'+filename):
self.kubectl('kubectl cp --container dbms {from_name} {to_name}'.format(from_name=self.configfolder+'/'+filename, to_name=self.activepod+':'+scriptfolder+script))
def loadData(self):
self.prepareInit()
print("loadData")
Expand Down Expand Up @@ -739,8 +754,8 @@ def runBenchmarks(self, connection=None, code=None, info=[], resultfolder='', co
if os.path.isfile(self.yamlfolder+self.deployment):
shutil.copy(self.yamlfolder+self.deployment, self.benchmark.path+'/'+connection+'.yml')
# append necessary reporters
self.benchmark.reporter.append(benchmarker.reporter.dataframer(self.benchmark))
self.benchmark.reporter.append(benchmarker.reporter.pickler(self.benchmark))
#self.benchmark.reporter.append(benchmarker.reporter.dataframer(self.benchmark))
#self.benchmark.reporter.append(benchmarker.reporter.pickler(self.benchmark))
# run or continue benchmarking
if code is not None:
self.benchmark.continueBenchmarks(overwrite = True)
Expand Down
2 changes: 1 addition & 1 deletion demo-tpch-k8s.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
Demo for bexhoma
This compares MonetDB and PostgreSQL performing some some TPC-H queries.
This compares MonetDB and PostgreSQL performing some TPC-H queries.
The cluster is managed using Kubernetes.
Copyright (C) 2020 Patrick Erdelt
Expand Down
118 changes: 84 additions & 34 deletions docs/API.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
# API Details

This document contains API details about
* [`setCode()`](#set-code)
* [`setExperiment()`](#set-experiment)
* [`set_code()`](#set-code)
* [`set_experiment()`](#set-experiment)
* [`set_workload()`](#set-workload)
* [`set_connectionmanagement()`](#set-connection-management)
* [`set_querymanagement()`](#set-query-management)
* [`set_resources()`](#set-resources)
* [`set_ddl_paramters()`](#set-ddl-parameters)
* [`runExperiment()`](#run-experiment)
* [`prepareExperiment()`](#prepare-experiment)
* [`startExperiment()`](#start-experiment)
Expand Down Expand Up @@ -32,6 +37,82 @@ The four parameter are given as keys to improve usability, for example `script="
Most of these keys are translated into technical details using a configuration file, c.f. an [example](../k8s-cluster.config).
Instances in a Kubernetes cluster are translated using [YAML files](#deployments).

## Set Workload

Specify details about the following experiment. This overwrites infos given in the query file.

```
cluster.set_workload(
name = 'TPC-H Queries',
info = 'This experiment compares instances of different DBMS on different machines.'
)
```

* `name`: Name of experiment
* `info`: Arbitrary string

These infos are used in reporting.

## Set Connection Management

Specify details about the following experiment. This overwrites infos given in the query file.

```
cluster.set_connectionmanagement(
numProcesses = 1,
runsPerConnection = 0,
timeout = 600
)
```

* `timeout`: Maximum lifespan of a connection. Default is None, i.e. no limit.
* `numProcesses`: Number of parallel client processes. Default is 1.
* `runsPerConnection`: Number of runs performed before connection is closed. Default is None, i.e. no limit.

These values are handed over to the [benchmarker](https://github.com/Beuth-Erdelt/DBMS-Benchmarker/blob/master/docs/Options.md#extended-query-file).

## Set Query Management

Specify details about the following experiment. This overwrites infos given in the query file.

```
cluster.set_querymanagement(numRun = 1)
```

* `numRun`: Number of runs each query is run for benchmarking

These values are handed over to the [benchmarker](https://github.com/Beuth-Erdelt/DBMS-Benchmarker/blob/master/docs/Options.md#extended-query-file), c.f. for more options.

## Set Resources

Specify details about the following experiment. This overwrites infos given in the instance description (YAML) in [deployments](Deployments.md) for Kubernetes.

```
cluster.set_resources(
requests = {
'cpu': '4000m',
'memory': '16Gi'
},
limits = {
'cpu': 0,
'memory': 0
},
nodeSelector = {
'gpu': 'v100',
})
```

## Set DDL Parameters

Specify details about the DDL scripts. This replaces placeholders in the scripts.

```
cluster.set_ddl_parameters(
shard_count = '2'
)
```
All occurrences of `{shard_count}` in the DDL scripts of the following experiment will be replaces by `2`.

## Run Experiment

<p align="center">
Expand Down Expand Up @@ -67,38 +148,7 @@ cluster.startPortforwarding()
* `cluster.createDeployment()`: Creates a deployment (pod and services) of Docker images to k8s
* Setup Network `cluster.startPortforwarding()`: Forwards the port of the DBMS in the pod to localhost:fixedport (same for all containers)

#### Deployments

The deployment is expected to be given as a file named `'deployment-'+docker+'-'+instance+'.yml'`
If no such file exists, a file named `'deploymenttemplate-"+docker+".yml'` is loaded and
* the instance name is understood as `cpu-mem-gpu-gputype`
* the yaml file is changed as
```
dep['spec']['template']['spec']['containers'][0]['resources']['requests']['cpu'] = cpu
dep['spec']['template']['spec']['containers'][0]['resources']['limits']['cpu'] = cpu
dep['spec']['template']['spec']['containers'][0]['resources']['requests']['memory'] = mem
dep['spec']['template']['spec']['containers'][0]['resources']['limits']['memory'] = mem
dep['spec']['template']['spec']['nodeSelector']['gpu'] = gputype
dep['spec']['template']['spec']['containers'][0]['resources']['limits']['nvidia.com/gpu'] = int(gpu)
```
* saved as `'deployment-'+docker+'-'+instance+'.yml'`

The resources (requests, limits and nodeSelector) can also be set explicitly using
```
cluster.set_resources(
requests = {
'cpu': cpu,
'memory': mem
},
limits = {
'cpu': 0, # unlimited
'memory': 0 # unlimited
},
nodeSelector = {
'cpu': cpu_type,
'gpu': gpu_type,
})
```
See the documentation for more information about [deployments](Deployments.md).

### On AWS

Expand Down
183 changes: 183 additions & 0 deletions docs/DBMS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# DBMS

To include a DBMS in a Kubernetes-based experiment you will need
* a Docker Image
* a JDBC Driver
* a Kubernetes Deployment Template
* some configuration
* How to load data (DDL command)
* DDL scripts
* How to connect via JDBC

This document contains examples for
* [MariaDB](#mariadb)
* [MonetDB](#monetdb)
* [OmniSci](#omnisci)
* [PostgreSQL](#postgresql)


## Example Explained

### Deployment

See documentation of [deployments](Deployments.md).

### Configuration

```
'dockers': {
'OmniSci': {
'loadData': 'bin/omnisql -u admin -pHyperInteractive < {scriptname}', # DBMS: Command to Login and Run Scripts
'template': { # Template for Benchmark Tool
'version': 'CE v5.4',
'alias': 'GPU',
'docker_alias': 'GPU',
'JDBC': {
'driver': 'com.omnisci.jdbc.OmniSciDriver',
'url': 'jdbc:omnisci:{serverip}:9091:omnisci',
'auth': {'user': 'admin', 'password': 'HyperInteractive'},
'jar': './omnisci-jdbc-4.7.1.jar' # DBMS: Local Path to JDBC Jar
}
},
'logfile': '/omnisci-storage/data/mapd_log/omnisci_server.INFO', # DBMS: Path to Log File on Server
'datadir': '/omnisci-storage/data/mapd_data/', # DBMS: Path to directory containing data storage
'priceperhourdollar': 0.0, # DBMS: Price per hour in USD if DBMS is rented
}
}
```
This has
* a base name for the DBMS
* a placeholder `template` for the [benchmark tool](https://github.com/Beuth-Erdelt/DBMS-Benchmarker/blob/master/docs/Options.md#connection-file)
* the JDBC driver jar locally available
* a command `loadData` for running the init scripts with `{scriptname}` as a placeholder for the script name inside the container
* `{serverip}` as a placeholder for the host address (localhost for k8s, an Elastic IP for AWS)
* `{dbname}` as a placeholder for the db name
* an optional `priceperhourdollar`
* an optional name of a `logfile` that is downloaded after the benchmark
* name of the `datadir` of the DBMS. It's size is measured using `du` after data loading has been finished.

## MariaDB

**Deployment**

https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/blob/master/k8s/deploymenttemplate-MariaDB.yml

**Configuration**
```
'MariaDB': {
'loadData': 'mysql < {scriptname}',
'template': {
'version': 'v10.4.6',
'alias': 'GP A',
'docker_alias': 'GP A',
'dialect': 'MySQL',
'JDBC': {
'driver': "org.mariadb.jdbc.Driver",
'auth': ["root", ""],
'url': 'jdbc:mysql://{serverip}:9091/{dbname}',
'jar': './mariadb-java-client-2.3.0.jar'
}
},
'logfile': '',
'datadir': '/var/lib/mysql/',
'priceperhourdollar': 0.0,
},
```

***DDL Scripts***

Example for [TPC-H](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/tree/master/experiments/tpch/MariaDB)

## MonetDB

**Deployment**

https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/blob/master/k8s/deploymenttemplate-MonetDB.yml

**Configuration**
```
'MonetDB': {
'loadData': 'cd /home/monetdb;mclient db < {scriptname}',
'template': {
'version': 'v11.31.7',
'alias': 'In-Memory C',
'docker_alias': 'In-Memory C',
'JDBC': {
'auth': ['monetdb', 'monetdb'],
'driver': 'nl.cwi.monetdb.jdbc.MonetDriver',
'jar': './monetdb-jdbc-2.29.jar',
'url': 'jdbc:monetdb://{serverip}:9091/db'
}
},
'logfile': '',
'datadir': '/var/monetdb5/',
'priceperhourdollar': 0.0,
},
```

***DDL Scripts***

Example for [TPC-H](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/tree/master/experiments/tpch/MonetDB)

## OmniSci

**Deployment**

https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/blob/master/k8s/deploymenttemplate-OmniSci.yml

**Configuration**
```
'OmniSci': {
'loadData': 'bin/omnisql -u admin -pHyperInteractive < {scriptname}',
'template': {
'version': 'CE v4.7',
'alias': 'GPU A',
'docker_alias': 'GPU A',
'JDBC': {
'driver': 'com.omnisci.jdbc.OmniSciDriver',
'url': 'jdbc:omnisci:{serverip}:9091:omnisci',
'auth': {'user': 'admin', 'password': 'HyperInteractive'},
'jar': './omnisci-jdbc-4.7.1.jar'
}
},
'logfile': '/omnisci-storage/data/mapd_log/omnisci_server.INFO',
'datadir': '/omnisci-storage/',
'priceperhourdollar': 0.0,
},
```

***DDL Scripts***

Example for [TPC-H](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/tree/master/experiments/tpch/OmniSci)

## PostgreSQL

**Deployment**

https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/blob/master/k8s/deploymenttemplate-PostgreSQL.yml

**Configuration**

```
'PostgreSQL': {
'loadData': 'psql -U postgres < {scriptname}',
'template': {
'version': 'v11.4',
'alias': 'GP D',
'docker_alias': 'GP D',
'JDBC': {
'driver': "org.postgresql.Driver",
'auth': ["postgres", ""],
'url': 'jdbc:postgresql://{serverip}:9091/postgres',
'jar': './postgresql-42.2.5.jar'
}
},
'logfile': '',
'datadir': '/var/lib/postgresql/data/',
'priceperhourdollar': 0.0,
},
```

***DDL Scripts***

Example for [TPC-H](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/tree/master/experiments/tpch/PostgreSQL)

0 comments on commit 2c4e56e

Please sign in to comment.