V0.4.1 (#35)

* Prepare next release * Docs: Improve * self.connectionmanagement['singleConnection'] optional * Docs: Improve API * K8s: Stop experiment does not delete pod * Docs: Improve API * Docs: Improve API * K8s: Script as metadata * K8s: Escape DBMS password * TPC-H: Dialect TSQL and Oracle * TPC-H: Demo config * TPC-H: Demo Exasol, OracleDB, SQLServer
Beuth-Erdelt · Sep 4, 2020 · 9de59c4 · 9de59c4
1 parent a2ab975
commit 9de59c4
Show file tree

Hide file tree

Showing 20 changed files with 1,775 additions and 136 deletions.
diff --git a/bexhoma/masterK8s.py b/bexhoma/masterK8s.py
@@ -174,8 +174,8 @@ def startExperiment(self, instance=None, volume=None, docker=None, script=None,
     def stopExperiment(self, delay=0):
         self.getInfo()
         self.stopPortforwarding()
-        for p in self.pods:
-            self.deletePod(p)
+        #for p in self.pods:
+        #    self.deletePod(p)
         experiment = {}
         experiment['delay'] = delay
         experiment['step'] = "stopExperiment"
@@ -453,7 +453,7 @@ def kubectl(self, command):
         print(command)
         os.system(command)
     def executeCTL(self, command):
-        fullcommand = 'kubectl exec '+self.activepod+' --container=dbms -- bash -c "'+command+'"'
+        fullcommand = 'kubectl exec '+self.activepod+' --container=dbms -- bash -c "'+command.replace('"','\\"')+'"'
         print(fullcommand)
         proc = subprocess.Popen(fullcommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
         stdout, stderr = proc.communicate()
@@ -635,6 +635,7 @@ def runBenchmarks(self, connection=None, code=None, info=[], resultfolder='', co
         c['active'] = True
         c['name'] = connection
         c['docker'] = self.d
+        c['script'] = self.s
         c['info'] = info
         c['timeLoad'] = self.timeLoading
         c['priceperhourdollar'] = 0.0  + self.docker['priceperhourdollar']
@@ -670,7 +671,7 @@ def runBenchmarks(self, connection=None, code=None, info=[], resultfolder='', co
         c['connectionmanagement']['numProcesses'] = self.connectionmanagement['numProcesses']
         c['connectionmanagement']['runsPerConnection'] = self.connectionmanagement['runsPerConnection']
         c['connectionmanagement']['timeout'] = self.connectionmanagement['timeout']
-        c['connectionmanagement']['singleConnection'] = self.connectionmanagement['singleConnection']
+        c['connectionmanagement']['singleConnection'] = self.connectionmanagement['singleConnection'] if 'singleConnection' in self.connectionmanagement else False
         c['monitoring'] = {}
         if 'monitor' in self.config['credentials']['k8s']:
             if 'grafanatoken' in self.config['credentials']['k8s']['monitor']:

diff --git a/demo-tpch-k8s.py b/demo-tpch-k8s.py
@@ -67,8 +67,7 @@
 cluster.set_connectionmanagement(
 	numProcesses = 1,
 	runsPerConnection = 0,
-	timeout = 600,
-	singleConnection = False)
+	timeout = 600)
 
 # set query parameters - this overwrites infos given in the query file
 cluster.set_querymanagement(numRun = 1)
@@ -84,7 +83,7 @@
 		'memory': 0
 	},
 	nodeSelector = {
-		'cpu': cpu_type,
+		#'cpu': cpu_type,
 	})
 
 
@@ -97,8 +96,8 @@ def run_experiments(docker, alias):
 	for i in range(1,numExperiments+1):
 		connection = cluster.getConnectionName()
 		cluster.runBenchmarks(connection=connection+"-"+str(i), alias=alias+'-'+str(i))
-	cluster.stopExperiment()
-	cluster.cleanExperiment()
+	cluster.stopExperiment(delay=60)
+	cluster.cleanExperiment(delay=60)
 	del gc.garbage[:]
 
 

diff --git a/docs/API.md b/docs/API.md
@@ -12,8 +12,26 @@ This document contains API details about
 
 ## Set Code
 
+Sets a code for the collection of experiments.
+This is used by the benchmarking package https://github.com/Beuth-Erdelt/DBMS-Benchmarker
+
+For a new experiment the code is `None`.
+
 ## Set Experiment
 
+```
+cluster.setExperiment(instance, volume, docker, script)
+```
+This sets (up to) four central parameter of an experiment
+* `instance`: Name of virtual machine
+* `volume`: Name of Storage Device
+* `docker`: Name of DBMS docker image
+* `script`: Name of collection of init scripts
+
+The four parameter are given as keys to improve usability, for example `script="SF1-indexes"` and `instance="4000m-16Gi"`.
+Most of these keys are translated into technical details using a configuration file, c.f. an [example](../k8s-cluster.config).
+Instances in a Kubernetes cluster are translated using [YAML files](#deployments).
+
 ## Run Experiment
 
 <p align="center">
@@ -46,21 +64,41 @@ cluster.setExperiment(instance, volume, docker, script)
 cluster.createDeployment()
 cluster.startPortforwarding()
 ```
-* `cluster.createDeployment()`: Creates a deployment of a docker image (pod and services) to k8s  
+* `cluster.createDeployment()`: Creates a deployment (pod and services) of Docker images to k8s
+* Setup Network `cluster.startPortforwarding()`: Forwards the port of the DBMS in the pod to localhost:fixedport (same for all containers) 
+
+#### Deployments
+
 The deployment is expected to be given as a file named `'deployment-'+docker+'-'+instance+'.yml'`  
 If no such file exists, a file named `'deploymenttemplate-"+docker+".yml'` is loaded and
-  * the instance name is understood as `cpu-mem-gpu-node`
+  * the instance name is understood as `cpu-mem-gpu-gputype`
   * the yaml file is changed as
   ```  
   dep['spec']['template']['spec']['containers'][0]['resources']['requests']['cpu'] = cpu  
   dep['spec']['template']['spec']['containers'][0]['resources']['limits']['cpu'] = cpu  
   dep['spec']['template']['spec']['containers'][0]['resources']['requests']['memory'] = mem  
   dep['spec']['template']['spec']['containers'][0]['resources']['limits']['memory'] = mem  
-  dep['spec']['template']['spec']['nodeSelector']['gpu'] = node  
+  dep['spec']['template']['spec']['nodeSelector']['gpu'] = gputype  
   dep['spec']['template']['spec']['containers'][0]['resources']['limits']['nvidia.com/gpu'] = int(gpu)
    ```
    * saved as `'deployment-'+docker+'-'+instance+'.yml'`
-* Setup Network `cluster.startPortforwarding()`: Forwards the port of the DBMS in the pod to localhost:fixedport (same for all containers) 
+
+The resources (requests, limits and nodeSelector) can also be set explicitly using
+```
+cluster.set_resources(
+  requests = {
+    'cpu': cpu,
+    'memory': mem
+  },
+  limits = {
+    'cpu': 0,     # unlimited
+    'memory': 0   # unlimited
+  },
+  nodeSelector = {
+    'cpu': cpu_type,
+    'gpu': gpu_type,
+  })
+```
 
 ### On AWS
 
@@ -156,9 +194,26 @@ cluster.runBenchmarks(connection=connectionname+"-2clients")
 
 Simulated clients can optionally be configured via a connection manager:
 ```
-cluster.connectionmanagement['numProcesses'] = 8
-cluster.connectionmanagement['runsPerConnection'] = 5
-cluster.connectionmanagement['timeout'] = 1200
+cluster.set_connectionmanagement(
+  numProcesses = 2,
+  runsPerConnection = 0,
+  timeout = 600)
+```
+
+### Workload Configurations
+
+The workload is set in the configuration of the experiment
+```
+cluster = testdesign(queryfile = queryfile)
+```
+
+The workload can be further manipulated:
+```
+cluster.set_workload(
+  name = 'TPC-H Queries',
+  info = 'This experiment compares instances of different DBMS on different machines.')
+
+cluster.set_querymanagement(numRun = 64)
 ```
 
 ### Collect Results
@@ -181,6 +236,8 @@ The result folder also contains
 
 **Note this means it stores confidential informations**
 
+Results are inspected best using the [dashboard](https://github.com/Beuth-Erdelt/DBMS-Benchmarker/blob/master/docs/Dashboard.md)
+
 ### Collect Host Informations
 
 Some information is given by configuration (JDBC data e.g.), some is collected from the experiment host:
@@ -232,13 +289,14 @@ The command `cluster.stopExperiment()` (basically) is short for:
 ```
 cluster.getInfo()
 cluster.stopPortforwarding()
-for p in cluster.pods:
-    cluster.deletePod(p)
+#for p in cluster.pods:
+#    cluster.deletePod(p)
 ```
 
 * `cluster.stopPortforwarding()`: Disconnects network from current pod
-* `cluster.deletePod()`: Deletes all pods belonging to namespace / matching label app. Note that the deployment will automatically start a new (clean) pod. Also note that the pod nevertheless will keep data if the storage device has been mounted.
+* ~~`cluster.deletePod()`: Deletes all pods belonging to namespace / matching label app. Note that the deployment will automatically start a new (clean) pod. Also note that the pod nevertheless will keep data if the storage device has been mounted.~~
 
+**Note: The pod is not deleted anymore**
 
 ### On AWS
 

diff --git a/docs/Example-TPC-H.md b/docs/Example-TPC-H.md
@@ -15,6 +15,8 @@ Official TPC-H benchmark - http://www.tpc.org/tpch
 
 We need configuration file containing the following informations in a predefined format, c.f. [demo file](../k8s-cluster.config).
 We may adjust the configuration to match the actual environment.
+This in particular holds for `imagePullSecrets`, `tolerations` and `nodeSelector` in the YAML files.
+
 The demo also includes the necessary settings for some DBMS: MariaDB, MonetDB, MySQL, OmniSci and PostgreSQL.
 
 For basic execution of benchmarking we need

diff --git a/docs/deploymenttemplate-PostgreSQL.yml b/docs/deploymenttemplate-PostgreSQL.yml
@@ -2,7 +2,7 @@ apiVersion: v1
 kind: Service
 metadata:
   labels: {app: bexhoma}
-  name: service-bexhoma
+  name: bexhoma-service
 spec:
   ports:
   - {port: 9091, protocol: TCP, name: port-dbms, targetPort: 5432}
@@ -13,16 +13,17 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
   labels: {app: bexhoma}
-  name: deployment-bexhoma
+  name: bexhoma-deployment-postgres
 spec:
   replicas: 1
   selector:
     matchLabels: {app: bexhoma}
   template:
     metadata:
-      labels: {app: bexhoma, env: test}
+      labels: {app: bexhoma}
     spec:
       automountServiceAccountToken: false
+      nodeSelector: {cpu: epyc-7542}
       containers:
       - name: dbms
         image: postgres:11.4
@@ -62,10 +63,9 @@ spec:
         - mountPath: /dev/disk
           name: disk
           readOnly: true
-      nodeSelector: {cpu: epyc-7542}
       volumes:
       - name: benchmark-data-volume
-        persistentVolumeClaim: {claimName: data-benchmarking}
+        persistentVolumeClaim: {claimName: vol-benchmarking}
       - hostPath:
           path: /
         name: rootfs

diff --git a/experiments/tpch/Exasol/initconstraints-tpch.sql b/experiments/tpch/Exasol/initconstraints-tpch.sql
@@ -0,0 +1,36 @@
+-- sccsid:     @(#)dss.ri	2.1.8.1
+-- tpcd benchmark version 8.0
+
+-- for table nation
+alter table public.nation
+add foreign key (n_regionkey) references public.region(r_regionkey);
+
+-- for table supplier
+alter table public.supplier
+add foreign key (s_nationkey) references public.nation(n_nationkey);
+
+-- for table customer
+alter table public.customer
+add foreign key (c_nationkey) references public.nation(n_nationkey);
+
+-- for table partsupp
+alter table public.partsupp
+add foreign key (ps_suppkey) references public.supplier(s_suppkey);
+
+alter table public.partsupp
+add foreign key (ps_partkey) references public.part(p_partkey);
+
+-- for table orders
+alter table public.orders
+add foreign key (o_custkey) references public.customer(c_custkey);
+
+-- for table lineitem
+alter table public.lineitem
+add foreign key (l_orderkey)  references public.orders(o_orderkey);
+
+alter table public.lineitem
+add foreign key (l_partkey,l_suppkey) references 
+        public.partsupp(ps_partkey,ps_suppkey);
+
+
+
diff --git a/experiments/tpch/Exasol/initdata-tpch-SF1.sql b/experiments/tpch/Exasol/initdata-tpch-SF1.sql
@@ -0,0 +1,8 @@
+IMPORT INTO public.customer FROM LOCAL CSV FILE '/data/tpch/SF1/customer.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
+IMPORT INTO public.lineitem FROM LOCAL CSV FILE '/data/tpch/SF1/lineitem.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
+IMPORT INTO public.nation FROM LOCAL CSV FILE '/data/tpch/SF1/nation.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
+IMPORT INTO public.orders FROM LOCAL CSV FILE '/data/tpch/SF1/orders.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
+IMPORT INTO public.part FROM LOCAL CSV FILE '/data/tpch/SF1/part.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
+IMPORT INTO public.partsupp FROM LOCAL CSV FILE '/data/tpch/SF1/partsupp.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
+IMPORT INTO public.region FROM LOCAL CSV FILE '/data/tpch/SF1/region.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
+IMPORT INTO public.supplier FROM LOCAL CSV FILE '/data/tpch/SF1/supplier.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
diff --git a/experiments/tpch/Exasol/initindexes-tpch.sql b/experiments/tpch/Exasol/initindexes-tpch.sql
@@ -0,0 +1,52 @@
+-- indexes for foreign keys
+
+-- for table region
+alter table public.region
+add primary key (r_regionkey);
+
+-- for table nation
+alter table public.nation
+add primary key (n_nationkey);
+
+-- create index on public.nation (n_regionkey);
+
+-- for table part
+alter table public.part
+add primary key (p_partkey);
+
+-- for table supplier
+alter table public.supplier
+add primary key (s_suppkey);
+
+-- create index on public.supplier (s_nationkey);
+
+-- for table partsupp
+alter table public.partsupp
+add primary key (ps_partkey,ps_suppkey);
+
+-- for table customer
+alter table public.customer
+add primary key (c_custkey);
+
+-- create index on public.customer (c_nationkey);
+
+-- for table partsupp
+-- create index on public.partsupp (ps_suppkey);
+
+-- create index on public.partsupp (ps_partkey);
+
+-- for table lineitem
+alter table public.lineitem
+add primary key (l_orderkey,l_linenumber);
+
+-- for table orders
+alter table public.orders
+add primary key (o_orderkey);
+
+-- create index on public.orders (o_custkey);
+
+-- for table lineitem
+-- create index on public.lineitem (l_orderkey);
+
+-- create index on public.lineitem (l_partkey,l_suppkey);
+