V0.4.2 (#36)

* Prepare next release * K8s: Parametrize DDL scripts * K8s: No pickled reports * Demo: Typo * Docs: API of set_ methods * Docs: Deployments * Docs: DBMS * Docs: References
Beuth-Erdelt · Sep 10, 2020 · 2c4e56e · 2c4e56e
1 parent 9de59c4
commit 2c4e56e
Show file tree

Hide file tree

Showing 11 changed files with 386 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -8,6 +8,12 @@ This tool supports AWS and kubernetes (k8s) based clusters.
 This documentation
 * illustrates the [concepts](docs/Concept.md)
 * provides a basic [TPC-H like example](docs/Example-TPC-H.md)
+* shows how to use several [DBMS](docs/DBMS.md)
+  * MariaDB
+  * MonetDB
+  * OmniSci
+  * PostgreSQL
+* illustrates the deployment in [Kubernetes](docs/Deployments.md)
 * provides [more detailed examples](docs/Examples.md)
   * [Example: TPC-H Benchmark for 3 DBMS on 1 Virtual Machine](docs/Examples.md#example-tpc-h-benchmark-for-3-dbms-on-1-virtual-machine)
   * [Example: TPC-H Benchmark for 1 DBMS on 3 Virtual Machines](docs/Examples.md#example-tpc-h-benchmark-for-1-dbms-on-3-virtual-machines)

diff --git a/bexhoma/masterK8s.py b/bexhoma/masterK8s.py
@@ -49,6 +49,7 @@ def __init__(self, clusterconfig='cluster.config', configfolder='experiments/',
         self.clusterconfig = clusterconfig
         self.timeLoading = 0
         self.resources = {}
+        self.ddl_parameters = {}
         self.connectionmanagement = {}
         self.connectionmanagement['numProcesses'] = None
         self.connectionmanagement['runsPerConnection'] = None
@@ -77,6 +78,8 @@ def set_querymanagement(self, **kwargs):
         self.querymanagement = kwargs
     def set_resources(self, **kwargs):
         self.resources = kwargs
+    def set_ddl_parameters(self, **kwargs):
+        self.ddl_parameters = kwargs
     def set_code(self, code):
         return self.setCode(code)
     def setCode(self, code):
@@ -490,10 +493,22 @@ def prepareInit(self):
         #    cmd['copy_init_scripts'] = 'cp {scriptname}'.format(scriptname=scriptfolder+script)+' /data/'+str(self.code)+'/'+self.connection+'_init_'+str(i)+'.log'
         #    stdin, stdout, stderr = self.executeCTL(cmd['copy_init_scripts'])
         #    i = i + 1
-        for script in self.initscript:
-            filename = self.d+'/'+script
-            if os.path.isfile(self.configfolder+'/'+filename):
-                self.kubectl('kubectl cp --container dbms {from_name} {to_name}'.format(from_name=self.configfolder+'/'+filename, to_name=self.activepod+':'+scriptfolder+script))
+        if len(self.ddl_parameters):
+            for script in self.initscript:
+                filename_template = self.d+'/'+script
+                if os.path.isfile(self.configfolder+'/'+filename_template):
+                    with open(self.configfolder+'/'+filename_template, "r") as initscript_template:
+                        data = initscript_template.read()
+                        data = data.format(**self.ddl_parameters)
+                        filename_filled = self.d+'/filled_'+script
+                        with open(self.configfolder+'/'+filename_filled, "w") as initscript_filled:
+                            initscript_filled.write(data)
+                        self.kubectl('kubectl cp --container dbms {from_name} {to_name}'.format(from_name=self.configfolder+'/'+filename_filled, to_name=self.activepod+':'+scriptfolder+script))
+        else:
+            for script in self.initscript:
+                filename = self.d+'/'+script
+                if os.path.isfile(self.configfolder+'/'+filename):
+                    self.kubectl('kubectl cp --container dbms {from_name} {to_name}'.format(from_name=self.configfolder+'/'+filename, to_name=self.activepod+':'+scriptfolder+script))
     def loadData(self):
         self.prepareInit()
         print("loadData")
@@ -739,8 +754,8 @@ def runBenchmarks(self, connection=None, code=None, info=[], resultfolder='', co
         if os.path.isfile(self.yamlfolder+self.deployment):
             shutil.copy(self.yamlfolder+self.deployment, self.benchmark.path+'/'+connection+'.yml')
         # append necessary reporters
-        self.benchmark.reporter.append(benchmarker.reporter.dataframer(self.benchmark))
-        self.benchmark.reporter.append(benchmarker.reporter.pickler(self.benchmark))
+        #self.benchmark.reporter.append(benchmarker.reporter.dataframer(self.benchmark))
+        #self.benchmark.reporter.append(benchmarker.reporter.pickler(self.benchmark))
         # run or continue benchmarking
         if code is not None:
             self.benchmark.continueBenchmarks(overwrite = True)

diff --git a/demo-tpch-k8s.py b/demo-tpch-k8s.py
@@ -1,6 +1,6 @@
 """
     Demo for bexhoma
-    This compares MonetDB and PostgreSQL performing some some TPC-H queries.
+    This compares MonetDB and PostgreSQL performing some TPC-H queries.
     The cluster is managed using Kubernetes.
     Copyright (C) 2020  Patrick Erdelt
 

diff --git a/docs/API.md b/docs/API.md
@@ -1,8 +1,13 @@
 # API Details
 
 This document contains API details about
-* [`setCode()`](#set-code)
-* [`setExperiment()`](#set-experiment)
+* [`set_code()`](#set-code)
+* [`set_experiment()`](#set-experiment)
+* [`set_workload()`](#set-workload)
+* [`set_connectionmanagement()`](#set-connection-management)
+* [`set_querymanagement()`](#set-query-management)
+* [`set_resources()`](#set-resources)
+* [`set_ddl_paramters()`](#set-ddl-parameters)
 * [`runExperiment()`](#run-experiment)
 * [`prepareExperiment()`](#prepare-experiment)
 * [`startExperiment()`](#start-experiment)
@@ -32,6 +37,82 @@ The four parameter are given as keys to improve usability, for example `script="
 Most of these keys are translated into technical details using a configuration file, c.f. an [example](../k8s-cluster.config).
 Instances in a Kubernetes cluster are translated using [YAML files](#deployments).
 
+## Set Workload
+
+Specify details about the following experiment. This overwrites infos given in the query file.
+
+```
+cluster.set_workload(
+  name = 'TPC-H Queries',
+  info = 'This experiment compares instances of different DBMS on different machines.'
+  )
+```
+
+* `name`: Name of experiment
+* `info`: Arbitrary string
+
+These infos are used in reporting.
+
+## Set Connection Management
+
+Specify details about the following experiment. This overwrites infos given in the query file.
+
+```
+cluster.set_connectionmanagement(
+  numProcesses = 1,
+  runsPerConnection = 0,
+  timeout = 600
+  )
+```
+
+* `timeout`: Maximum lifespan of a connection. Default is None, i.e. no limit.
+* `numProcesses`: Number of parallel client processes. Default is 1.
+* `runsPerConnection`: Number of runs performed before connection is closed. Default is None, i.e. no limit.
+
+These values are handed over to the [benchmarker](https://github.com/Beuth-Erdelt/DBMS-Benchmarker/blob/master/docs/Options.md#extended-query-file).
+
+## Set Query Management
+
+Specify details about the following experiment. This overwrites infos given in the query file.
+
+```
+cluster.set_querymanagement(numRun = 1)
+```
+
+* `numRun`: Number of runs each query is run for benchmarking
+
+These values are handed over to the [benchmarker](https://github.com/Beuth-Erdelt/DBMS-Benchmarker/blob/master/docs/Options.md#extended-query-file), c.f. for more options.
+
+## Set Resources
+
+Specify details about the following experiment. This overwrites infos given in the instance description (YAML) in [deployments](Deployments.md) for Kubernetes.
+
+```
+cluster.set_resources(
+  requests = {
+    'cpu': '4000m',
+    'memory': '16Gi'
+  },
+  limits = {
+    'cpu': 0,
+    'memory': 0
+  },
+  nodeSelector = {
+    'gpu': 'v100',
+  })
+```
+
+## Set DDL Parameters
+
+Specify details about the DDL scripts. This replaces placeholders in the scripts.
+
+```
+cluster.set_ddl_parameters(
+  shard_count = '2'
+)
+```
+All occurrences of `{shard_count}` in the DDL scripts of the following experiment will be replaces by `2`.
+
 ## Run Experiment
 
 <p align="center">
@@ -67,38 +148,7 @@ cluster.startPortforwarding()
 * `cluster.createDeployment()`: Creates a deployment (pod and services) of Docker images to k8s
 * Setup Network `cluster.startPortforwarding()`: Forwards the port of the DBMS in the pod to localhost:fixedport (same for all containers) 
 
-#### Deployments
-
-The deployment is expected to be given as a file named `'deployment-'+docker+'-'+instance+'.yml'`  
-If no such file exists, a file named `'deploymenttemplate-"+docker+".yml'` is loaded and
-  * the instance name is understood as `cpu-mem-gpu-gputype`
-  * the yaml file is changed as
-  ```  
-  dep['spec']['template']['spec']['containers'][0]['resources']['requests']['cpu'] = cpu  
-  dep['spec']['template']['spec']['containers'][0]['resources']['limits']['cpu'] = cpu  
-  dep['spec']['template']['spec']['containers'][0]['resources']['requests']['memory'] = mem  
-  dep['spec']['template']['spec']['containers'][0]['resources']['limits']['memory'] = mem  
-  dep['spec']['template']['spec']['nodeSelector']['gpu'] = gputype  
-  dep['spec']['template']['spec']['containers'][0]['resources']['limits']['nvidia.com/gpu'] = int(gpu)
-   ```
-   * saved as `'deployment-'+docker+'-'+instance+'.yml'`
-
-The resources (requests, limits and nodeSelector) can also be set explicitly using
-```
-cluster.set_resources(
-  requests = {
-    'cpu': cpu,
-    'memory': mem
-  },
-  limits = {
-    'cpu': 0,     # unlimited
-    'memory': 0   # unlimited
-  },
-  nodeSelector = {
-    'cpu': cpu_type,
-    'gpu': gpu_type,
-  })
-```
+See the documentation for more information about [deployments](Deployments.md).
 
 ### On AWS
 

diff --git a/docs/DBMS.md b/docs/DBMS.md
@@ -0,0 +1,183 @@
+# DBMS
+
+To include a DBMS in a Kubernetes-based experiment you will need
+* a Docker Image
+* a JDBC Driver
+* a Kubernetes Deployment Template
+* some configuration
+  * How to load data (DDL command)
+  * DDL scripts
+  * How to connect via JDBC
+
+This document contains examples for
+* [MariaDB](#mariadb)
+* [MonetDB](#monetdb)
+* [OmniSci](#omnisci)
+* [PostgreSQL](#postgresql)
+
+
+## Example Explained
+
+### Deployment
+
+See documentation of [deployments](Deployments.md).
+
+### Configuration
+
+```
+'dockers': {
+    'OmniSci': {
+        'loadData': 'bin/omnisql -u admin -pHyperInteractive < {scriptname}',     # DBMS: Command to Login and Run Scripts
+        'template': {                                                             # Template for Benchmark Tool
+            'version': 'CE v5.4',
+            'alias': 'GPU',
+            'docker_alias': 'GPU',
+            'JDBC': {
+                'driver': 'com.omnisci.jdbc.OmniSciDriver',
+                'url': 'jdbc:omnisci:{serverip}:9091:omnisci',
+                'auth': {'user': 'admin', 'password': 'HyperInteractive'},
+                'jar': './omnisci-jdbc-4.7.1.jar'                                   # DBMS: Local Path to JDBC Jar
+            }
+        },
+        'logfile': '/omnisci-storage/data/mapd_log/omnisci_server.INFO',          # DBMS: Path to Log File on Server
+        'datadir': '/omnisci-storage/data/mapd_data/',                            # DBMS: Path to directory containing data storage
+        'priceperhourdollar': 0.0,                                                # DBMS: Price per hour in USD if DBMS is rented
+    }
+}
+```
+This has
+* a base name for the DBMS
+* a placeholder `template` for the [benchmark tool](https://github.com/Beuth-Erdelt/DBMS-Benchmarker/blob/master/docs/Options.md#connection-file)
+* the JDBC driver jar locally available
+* a command `loadData` for running the init scripts with `{scriptname}` as a placeholder for the script name inside the container
+* `{serverip}` as a placeholder for the host address (localhost for k8s, an Elastic IP for AWS)
+* `{dbname}` as a placeholder for the db name
+* an optional `priceperhourdollar`
+* an optional name of a `logfile` that is downloaded after the benchmark
+* name of the `datadir` of the DBMS. It's size is measured using `du` after data loading has been finished.
+
+## MariaDB
+
+**Deployment**
+
+https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/blob/master/k8s/deploymenttemplate-MariaDB.yml
+
+**Configuration**
+```
+       'MariaDB': {
+            'loadData': 'mysql < {scriptname}',
+            'template': {
+                'version': 'v10.4.6',
+                'alias': 'GP A',
+                'docker_alias': 'GP A',
+                'dialect': 'MySQL',
+                'JDBC': {
+                    'driver': "org.mariadb.jdbc.Driver",
+                    'auth': ["root", ""],
+                    'url': 'jdbc:mysql://{serverip}:9091/{dbname}',
+                    'jar': './mariadb-java-client-2.3.0.jar'
+                }
+            },
+            'logfile': '',
+            'datadir': '/var/lib/mysql/',
+            'priceperhourdollar': 0.0,
+        },
+```
+
+***DDL Scripts***
+
+Example for [TPC-H](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/tree/master/experiments/tpch/MariaDB)
+
+## MonetDB
+
+**Deployment**
+
+https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/blob/master/k8s/deploymenttemplate-MonetDB.yml
+
+**Configuration**
+```
+       'MonetDB': {
+            'loadData': 'cd /home/monetdb;mclient db < {scriptname}',
+            'template': {
+                'version': 'v11.31.7',
+                'alias': 'In-Memory C',
+                'docker_alias': 'In-Memory C',
+                 'JDBC': {
+                    'auth': ['monetdb', 'monetdb'],
+                    'driver': 'nl.cwi.monetdb.jdbc.MonetDriver',
+                    'jar': './monetdb-jdbc-2.29.jar',
+                    'url': 'jdbc:monetdb://{serverip}:9091/db'
+                }
+            },
+            'logfile': '',
+            'datadir': '/var/monetdb5/',
+            'priceperhourdollar': 0.0,
+        },
+```
+
+***DDL Scripts***
+
+Example for [TPC-H](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/tree/master/experiments/tpch/MonetDB)
+
+## OmniSci
+
+**Deployment**
+
+https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/blob/master/k8s/deploymenttemplate-OmniSci.yml
+
+**Configuration**
+```
+        'OmniSci': {
+            'loadData': 'bin/omnisql -u admin -pHyperInteractive < {scriptname}',
+            'template': {
+                'version': 'CE v4.7',
+                'alias': 'GPU A',
+                'docker_alias': 'GPU A',
+                'JDBC': {
+                    'driver': 'com.omnisci.jdbc.OmniSciDriver',
+                    'url': 'jdbc:omnisci:{serverip}:9091:omnisci',
+                    'auth': {'user': 'admin', 'password': 'HyperInteractive'},
+                    'jar': './omnisci-jdbc-4.7.1.jar'
+                }
+            },
+            'logfile': '/omnisci-storage/data/mapd_log/omnisci_server.INFO',
+            'datadir': '/omnisci-storage/',
+            'priceperhourdollar': 0.0,
+        },
+```
+
+***DDL Scripts***
+
+Example for [TPC-H](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/tree/master/experiments/tpch/OmniSci)
+
+## PostgreSQL
+
+**Deployment**
+
+https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/blob/master/k8s/deploymenttemplate-PostgreSQL.yml
+
+**Configuration**
+
+```
+        'PostgreSQL': {
+            'loadData': 'psql -U postgres < {scriptname}',
+            'template': {
+                'version': 'v11.4',
+                'alias': 'GP D',
+                'docker_alias': 'GP D',
+                'JDBC': {
+                    'driver': "org.postgresql.Driver",
+                    'auth': ["postgres", ""],
+                    'url': 'jdbc:postgresql://{serverip}:9091/postgres',
+                    'jar': './postgresql-42.2.5.jar'
+                }
+            },
+            'logfile': '',
+            'datadir': '/var/lib/postgresql/data/',
+            'priceperhourdollar': 0.0,
+        },
+```
+
+***DDL Scripts***
+
+Example for [TPC-H](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/tree/master/experiments/tpch/PostgreSQL)