Skip to content

Commit

Permalink
V0.4.1 (#35)
Browse files Browse the repository at this point in the history
* Prepare next release

* Docs: Improve

* self.connectionmanagement['singleConnection'] optional

* Docs: Improve API

* K8s: Stop experiment does not delete pod

* Docs: Improve API

* Docs: Improve API

* K8s: Script as metadata

* K8s: Escape DBMS password

* TPC-H: Dialect TSQL and Oracle

* TPC-H: Demo config

* TPC-H: Demo Exasol, OracleDB, SQLServer
  • Loading branch information
perdelt committed Sep 4, 2020
1 parent a2ab975 commit 9de59c4
Show file tree
Hide file tree
Showing 20 changed files with 1,775 additions and 136 deletions.
9 changes: 5 additions & 4 deletions bexhoma/masterK8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def startExperiment(self, instance=None, volume=None, docker=None, script=None,
def stopExperiment(self, delay=0):
self.getInfo()
self.stopPortforwarding()
for p in self.pods:
self.deletePod(p)
#for p in self.pods:
# self.deletePod(p)
experiment = {}
experiment['delay'] = delay
experiment['step'] = "stopExperiment"
Expand Down Expand Up @@ -453,7 +453,7 @@ def kubectl(self, command):
print(command)
os.system(command)
def executeCTL(self, command):
fullcommand = 'kubectl exec '+self.activepod+' --container=dbms -- bash -c "'+command+'"'
fullcommand = 'kubectl exec '+self.activepod+' --container=dbms -- bash -c "'+command.replace('"','\\"')+'"'
print(fullcommand)
proc = subprocess.Popen(fullcommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = proc.communicate()
Expand Down Expand Up @@ -635,6 +635,7 @@ def runBenchmarks(self, connection=None, code=None, info=[], resultfolder='', co
c['active'] = True
c['name'] = connection
c['docker'] = self.d
c['script'] = self.s
c['info'] = info
c['timeLoad'] = self.timeLoading
c['priceperhourdollar'] = 0.0 + self.docker['priceperhourdollar']
Expand Down Expand Up @@ -670,7 +671,7 @@ def runBenchmarks(self, connection=None, code=None, info=[], resultfolder='', co
c['connectionmanagement']['numProcesses'] = self.connectionmanagement['numProcesses']
c['connectionmanagement']['runsPerConnection'] = self.connectionmanagement['runsPerConnection']
c['connectionmanagement']['timeout'] = self.connectionmanagement['timeout']
c['connectionmanagement']['singleConnection'] = self.connectionmanagement['singleConnection']
c['connectionmanagement']['singleConnection'] = self.connectionmanagement['singleConnection'] if 'singleConnection' in self.connectionmanagement else False
c['monitoring'] = {}
if 'monitor' in self.config['credentials']['k8s']:
if 'grafanatoken' in self.config['credentials']['k8s']['monitor']:
Expand Down
9 changes: 4 additions & 5 deletions demo-tpch-k8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@
cluster.set_connectionmanagement(
numProcesses = 1,
runsPerConnection = 0,
timeout = 600,
singleConnection = False)
timeout = 600)

# set query parameters - this overwrites infos given in the query file
cluster.set_querymanagement(numRun = 1)
Expand All @@ -84,7 +83,7 @@
'memory': 0
},
nodeSelector = {
'cpu': cpu_type,
#'cpu': cpu_type,
})


Expand All @@ -97,8 +96,8 @@ def run_experiments(docker, alias):
for i in range(1,numExperiments+1):
connection = cluster.getConnectionName()
cluster.runBenchmarks(connection=connection+"-"+str(i), alias=alias+'-'+str(i))
cluster.stopExperiment()
cluster.cleanExperiment()
cluster.stopExperiment(delay=60)
cluster.cleanExperiment(delay=60)
del gc.garbage[:]


Expand Down
78 changes: 68 additions & 10 deletions docs/API.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,26 @@ This document contains API details about

## Set Code

Sets a code for the collection of experiments.
This is used by the benchmarking package https://github.com/Beuth-Erdelt/DBMS-Benchmarker

For a new experiment the code is `None`.

## Set Experiment

```
cluster.setExperiment(instance, volume, docker, script)
```
This sets (up to) four central parameter of an experiment
* `instance`: Name of virtual machine
* `volume`: Name of Storage Device
* `docker`: Name of DBMS docker image
* `script`: Name of collection of init scripts

The four parameter are given as keys to improve usability, for example `script="SF1-indexes"` and `instance="4000m-16Gi"`.
Most of these keys are translated into technical details using a configuration file, c.f. an [example](../k8s-cluster.config).
Instances in a Kubernetes cluster are translated using [YAML files](#deployments).

## Run Experiment

<p align="center">
Expand Down Expand Up @@ -46,21 +64,41 @@ cluster.setExperiment(instance, volume, docker, script)
cluster.createDeployment()
cluster.startPortforwarding()
```
* `cluster.createDeployment()`: Creates a deployment of a docker image (pod and services) to k8s
* `cluster.createDeployment()`: Creates a deployment (pod and services) of Docker images to k8s
* Setup Network `cluster.startPortforwarding()`: Forwards the port of the DBMS in the pod to localhost:fixedport (same for all containers)

#### Deployments

The deployment is expected to be given as a file named `'deployment-'+docker+'-'+instance+'.yml'`
If no such file exists, a file named `'deploymenttemplate-"+docker+".yml'` is loaded and
* the instance name is understood as `cpu-mem-gpu-node`
* the instance name is understood as `cpu-mem-gpu-gputype`
* the yaml file is changed as
```
dep['spec']['template']['spec']['containers'][0]['resources']['requests']['cpu'] = cpu
dep['spec']['template']['spec']['containers'][0]['resources']['limits']['cpu'] = cpu
dep['spec']['template']['spec']['containers'][0]['resources']['requests']['memory'] = mem
dep['spec']['template']['spec']['containers'][0]['resources']['limits']['memory'] = mem
dep['spec']['template']['spec']['nodeSelector']['gpu'] = node
dep['spec']['template']['spec']['nodeSelector']['gpu'] = gputype
dep['spec']['template']['spec']['containers'][0]['resources']['limits']['nvidia.com/gpu'] = int(gpu)
```
* saved as `'deployment-'+docker+'-'+instance+'.yml'`
* Setup Network `cluster.startPortforwarding()`: Forwards the port of the DBMS in the pod to localhost:fixedport (same for all containers)

The resources (requests, limits and nodeSelector) can also be set explicitly using
```
cluster.set_resources(
requests = {
'cpu': cpu,
'memory': mem
},
limits = {
'cpu': 0, # unlimited
'memory': 0 # unlimited
},
nodeSelector = {
'cpu': cpu_type,
'gpu': gpu_type,
})
```

### On AWS

Expand Down Expand Up @@ -156,9 +194,26 @@ cluster.runBenchmarks(connection=connectionname+"-2clients")

Simulated clients can optionally be configured via a connection manager:
```
cluster.connectionmanagement['numProcesses'] = 8
cluster.connectionmanagement['runsPerConnection'] = 5
cluster.connectionmanagement['timeout'] = 1200
cluster.set_connectionmanagement(
numProcesses = 2,
runsPerConnection = 0,
timeout = 600)
```

### Workload Configurations

The workload is set in the configuration of the experiment
```
cluster = testdesign(queryfile = queryfile)
```

The workload can be further manipulated:
```
cluster.set_workload(
name = 'TPC-H Queries',
info = 'This experiment compares instances of different DBMS on different machines.')
cluster.set_querymanagement(numRun = 64)
```

### Collect Results
Expand All @@ -181,6 +236,8 @@ The result folder also contains

**Note this means it stores confidential informations**

Results are inspected best using the [dashboard](https://github.com/Beuth-Erdelt/DBMS-Benchmarker/blob/master/docs/Dashboard.md)

### Collect Host Informations

Some information is given by configuration (JDBC data e.g.), some is collected from the experiment host:
Expand Down Expand Up @@ -232,13 +289,14 @@ The command `cluster.stopExperiment()` (basically) is short for:
```
cluster.getInfo()
cluster.stopPortforwarding()
for p in cluster.pods:
cluster.deletePod(p)
#for p in cluster.pods:
# cluster.deletePod(p)
```

* `cluster.stopPortforwarding()`: Disconnects network from current pod
* `cluster.deletePod()`: Deletes all pods belonging to namespace / matching label app. Note that the deployment will automatically start a new (clean) pod. Also note that the pod nevertheless will keep data if the storage device has been mounted.
* ~~`cluster.deletePod()`: Deletes all pods belonging to namespace / matching label app. Note that the deployment will automatically start a new (clean) pod. Also note that the pod nevertheless will keep data if the storage device has been mounted.~~

**Note: The pod is not deleted anymore**

### On AWS

Expand Down
2 changes: 2 additions & 0 deletions docs/Example-TPC-H.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Official TPC-H benchmark - http://www.tpc.org/tpch

We need configuration file containing the following informations in a predefined format, c.f. [demo file](../k8s-cluster.config).
We may adjust the configuration to match the actual environment.
This in particular holds for `imagePullSecrets`, `tolerations` and `nodeSelector` in the YAML files.

The demo also includes the necessary settings for some DBMS: MariaDB, MonetDB, MySQL, OmniSci and PostgreSQL.

For basic execution of benchmarking we need
Expand Down
10 changes: 5 additions & 5 deletions docs/deploymenttemplate-PostgreSQL.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v1
kind: Service
metadata:
labels: {app: bexhoma}
name: service-bexhoma
name: bexhoma-service
spec:
ports:
- {port: 9091, protocol: TCP, name: port-dbms, targetPort: 5432}
Expand All @@ -13,16 +13,17 @@ apiVersion: apps/v1
kind: Deployment
metadata:
labels: {app: bexhoma}
name: deployment-bexhoma
name: bexhoma-deployment-postgres
spec:
replicas: 1
selector:
matchLabels: {app: bexhoma}
template:
metadata:
labels: {app: bexhoma, env: test}
labels: {app: bexhoma}
spec:
automountServiceAccountToken: false
nodeSelector: {cpu: epyc-7542}
containers:
- name: dbms
image: postgres:11.4
Expand Down Expand Up @@ -62,10 +63,9 @@ spec:
- mountPath: /dev/disk
name: disk
readOnly: true
nodeSelector: {cpu: epyc-7542}
volumes:
- name: benchmark-data-volume
persistentVolumeClaim: {claimName: data-benchmarking}
persistentVolumeClaim: {claimName: vol-benchmarking}
- hostPath:
path: /
name: rootfs
Expand Down
36 changes: 36 additions & 0 deletions experiments/tpch/Exasol/initconstraints-tpch.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
-- sccsid: @(#)dss.ri 2.1.8.1
-- tpcd benchmark version 8.0

-- for table nation
alter table public.nation
add foreign key (n_regionkey) references public.region(r_regionkey);

-- for table supplier
alter table public.supplier
add foreign key (s_nationkey) references public.nation(n_nationkey);

-- for table customer
alter table public.customer
add foreign key (c_nationkey) references public.nation(n_nationkey);

-- for table partsupp
alter table public.partsupp
add foreign key (ps_suppkey) references public.supplier(s_suppkey);

alter table public.partsupp
add foreign key (ps_partkey) references public.part(p_partkey);

-- for table orders
alter table public.orders
add foreign key (o_custkey) references public.customer(c_custkey);

-- for table lineitem
alter table public.lineitem
add foreign key (l_orderkey) references public.orders(o_orderkey);

alter table public.lineitem
add foreign key (l_partkey,l_suppkey) references
public.partsupp(ps_partkey,ps_suppkey);



8 changes: 8 additions & 0 deletions experiments/tpch/Exasol/initdata-tpch-SF1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
IMPORT INTO public.customer FROM LOCAL CSV FILE '/data/tpch/SF1/customer.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
IMPORT INTO public.lineitem FROM LOCAL CSV FILE '/data/tpch/SF1/lineitem.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
IMPORT INTO public.nation FROM LOCAL CSV FILE '/data/tpch/SF1/nation.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
IMPORT INTO public.orders FROM LOCAL CSV FILE '/data/tpch/SF1/orders.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
IMPORT INTO public.part FROM LOCAL CSV FILE '/data/tpch/SF1/part.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
IMPORT INTO public.partsupp FROM LOCAL CSV FILE '/data/tpch/SF1/partsupp.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
IMPORT INTO public.region FROM LOCAL CSV FILE '/data/tpch/SF1/region.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
IMPORT INTO public.supplier FROM LOCAL CSV FILE '/data/tpch/SF1/supplier.tbl' COLUMN SEPARATOR = '|' SKIP = 0;
52 changes: 52 additions & 0 deletions experiments/tpch/Exasol/initindexes-tpch.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
-- indexes for foreign keys

-- for table region
alter table public.region
add primary key (r_regionkey);

-- for table nation
alter table public.nation
add primary key (n_nationkey);

-- create index on public.nation (n_regionkey);

-- for table part
alter table public.part
add primary key (p_partkey);

-- for table supplier
alter table public.supplier
add primary key (s_suppkey);

-- create index on public.supplier (s_nationkey);

-- for table partsupp
alter table public.partsupp
add primary key (ps_partkey,ps_suppkey);

-- for table customer
alter table public.customer
add primary key (c_custkey);

-- create index on public.customer (c_nationkey);

-- for table partsupp
-- create index on public.partsupp (ps_suppkey);

-- create index on public.partsupp (ps_partkey);

-- for table lineitem
alter table public.lineitem
add primary key (l_orderkey,l_linenumber);

-- for table orders
alter table public.orders
add primary key (o_orderkey);

-- create index on public.orders (o_custkey);

-- for table lineitem
-- create index on public.lineitem (l_orderkey);

-- create index on public.lineitem (l_partkey,l_suppkey);

0 comments on commit 9de59c4

Please sign in to comment.