From 5b12ac37a90a98c671b94d528495e5948f007fb6 Mon Sep 17 00:00:00 2001 From: Patrick Erdelt Date: Mon, 11 Sep 2023 13:09:06 +0200 Subject: [PATCH] V0.6.6 Fix vulnerabilities and docs (#193) * DBMSBenchmarker: use latest v0.13.4 --- README.md | 20 +- build.sh | 4 +- .../create_Dockerfiles.py | 2 +- .../create_Dockerfiles.py | 2 +- k8s/deploymenttemplate-CockroachDB.yml | 328 ++++++++++++++++++ k8s/deploymenttemplate-MySQL.yml | 110 ++++++ k8s/deploymenttemplate-bexhoma-dashboard.yml | 4 +- ...btemplate-benchmarking-dbmsbenchmarker.yml | 2 +- 8 files changed, 456 insertions(+), 16 deletions(-) create mode 100644 k8s/deploymenttemplate-CockroachDB.yml create mode 100644 k8s/deploymenttemplate-MySQL.yml diff --git a/README.md b/README.md index 669d415d..9d6470c2 100644 --- a/README.md +++ b/README.md @@ -2,31 +2,29 @@ [![GitHub release](https://img.shields.io/github/release/Beuth-Erdelt/Benchmark-Experiment-Host-Manager.svg)](https://GitHub.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/releases/) [![PyPI version](https://badge.fury.io/py/bexhoma.svg)](https://badge.fury.io/py/bexhoma) [![.github/workflows/draft-pdf.yml](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/actions/workflows/draft-pdf.yml/badge.svg)](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/actions/workflows/draft-pdf.yml) +[![bexhoma](https://snyk.io/advisor/python/bexhoma/badge.svg)](https://snyk.io/advisor/python/bexhoma) +[![Documentation Status](https://readthedocs.org/projects/bexhoma/badge/?version=latest)](https://bexhoma.readthedocs.io/en/latest/?badge=latest) # Benchmark Experiment Host Manager (Bexhoma) This Python tools helps **managing benchmark experiments of Database Management Systems (DBMS) in a Kubernetes-based High-Performance-Computing (HPC) cluster environment**. It enables users to configure hardware / software setups for easily repeating tests over varying configurations. +

+ +

+ It serves as the **orchestrator** [2] for distributed parallel benchmarking experiments in a Kubernetes Cloud. This has been tested at Amazon Web Services, Google Cloud, Microsoft Azure, IBM Cloud, Oracle Cloud, and at Minikube installations, running with Citus Data (Hyperscale), Clickhouse, CockroachDB, Exasol, IBM DB2, MariaDB, MariaDB Columnstore, MemSQL (SingleStore), MonetDB, MySQL, OmniSci (HEAVY.AI), Oracle DB, PostgreSQL, SQL Server, SAP HANA, TimescaleDB, and Vertica. Benchmarks included are YCSB, TPC-H and TPC-C (HammerDB and Benchbase version). -

- -

- The basic workflow is [1,2]: start a containerized version of the DBMS, install monitoring software, import existing data, run benchmarks and shut down everything with a single command. A more advanced workflow is: Plan a sequence of such experiments, run plan as a batch and join results for comparison. It is also possible to scale-out drivers for generating and loading data and for benchmarking to simulate cloud-native environments as in [4]. See [example](TPCTC23/README.md) results as presented in [A Cloud-Native Adoption of Classical DBMS Performance Benchmarks and Tools](http://dx.doi.org/10.13140/RG.2.2.29866.18880) and how they are generated. -

- -

- See the [homepage](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager) and the [documentation](https://bexhoma.readthedocs.io/en/latest/). If you encounter any issues, please report them to our [Github issue tracker](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/issues). @@ -61,9 +59,13 @@ If you encounter any issues, please report them to our [Github issue tracker](ht ## More Informations For full power, use this tool as an orchestrator as in [2]. It also starts a monitoring container using [Prometheus](https://prometheus.io/) and a metrics collector container using [cAdvisor](https://github.com/google/cadvisor). For analytical use cases, the Python package [dbmsbenchmarker](https://github.com/Beuth-Erdelt/DBMS-Benchmarker), [3], is used as query executor and evaluator as in [1,2]. -For transactional use cases, HammerDB's TPC-C, Benchbase's TPC-C and YCSB are used as drivers for generating and loading data and for running the workload. +For transactional use cases, HammerDB's TPC-C, Benchbase's TPC-C and YCSB are used as drivers for generating and loading data and for running the workload as in [4]. See the [images](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/tree/master/images/) folder for more details. +

+ +

+ ## Contributing, Bug Reports If you have any question or found a bug, please report them to our [Github issue tracker](https://github.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/issues). diff --git a/build.sh b/build.sh index 950ba322..e635e223 100644 --- a/build.sh +++ b/build.sh @@ -9,7 +9,7 @@ cd evaluator_dbmsbenchmarker python create_Dockerfiles.py #docker build -f Dockerfile_v0.13.2 -t bexhoma/evaluator_dbmsbenchmarker:v0.13.2 . #docker push bexhoma/evaluator_dbmsbenchmarker:v0.13.2 & -docker push bexhoma/evaluator_dbmsbenchmarker:v0.13.3 & +docker push bexhoma/evaluator_dbmsbenchmarker:v0.13.4 & cd .. cd benchmarker_dbmsbenchmarker @@ -17,7 +17,7 @@ cd benchmarker_dbmsbenchmarker python create_Dockerfiles.py #docker build -f Dockerfile_v0.13.2 -t bexhoma/benchmarker_dbmsbenchmarker:v0.13.2 . #docker push bexhoma/benchmarker_dbmsbenchmarker:v0.13.2 & -docker push bexhoma/benchmarker_dbmsbenchmarker:v0.13.3 & +docker push bexhoma/benchmarker_dbmsbenchmarker:v0.13.4 & cd .. ########### diff --git a/images/benchmarker_dbmsbenchmarker/create_Dockerfiles.py b/images/benchmarker_dbmsbenchmarker/create_Dockerfiles.py index eefc3cce..2f313ad5 100644 --- a/images/benchmarker_dbmsbenchmarker/create_Dockerfiles.py +++ b/images/benchmarker_dbmsbenchmarker/create_Dockerfiles.py @@ -1,7 +1,7 @@ import subprocess #versions = ['v0.12.1','v0.12.2','v0.12.3','v0.12.4','v0.12.5'] -versions = ['v0.13.3'] +versions = ['v0.13.3','v0.13.4'] with open('Dockerfile_template', 'r') as file: dockerfile = file.read() diff --git a/images/evaluator_dbmsbenchmarker/create_Dockerfiles.py b/images/evaluator_dbmsbenchmarker/create_Dockerfiles.py index a11a769b..37339679 100644 --- a/images/evaluator_dbmsbenchmarker/create_Dockerfiles.py +++ b/images/evaluator_dbmsbenchmarker/create_Dockerfiles.py @@ -1,7 +1,7 @@ import subprocess #versions = ['v0.12.1','v0.12.2','v0.12.3','v0.12.4','v0.12.5'] -versions = ['v0.13.3'] +versions = ['v0.13.3','v0.13.4'] with open('Dockerfile_template', 'r') as file: dockerfile = file.read() diff --git a/k8s/deploymenttemplate-CockroachDB.yml b/k8s/deploymenttemplate-CockroachDB.yml new file mode 100644 index 00000000..24848c2c --- /dev/null +++ b/k8s/deploymenttemplate-CockroachDB.yml @@ -0,0 +1,328 @@ +# Source: https://github.com/cockroachdb/cockroach/blob/master/cloud/kubernetes/cockroachdb-statefulset.yaml +# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/cockroachdb-statefulset.yaml +apiVersion: v1 +kind: Service +metadata: + labels: {app: bexhoma, component: sut, configuration: default, experiment: default} + name: bexhoma-service +spec: + ports: + - {port: 9091, protocol: TCP, name: port-dbms, targetPort: 26257} + - {port: 8080, protocol: TCP, name: port-web, targetPort: 8080} + - {port: 9300, protocol: TCP, name: port-monitoring, targetPort: 9300} + selector: {app: bexhoma, component: worker, configuration: default, experiment: default} +#metadata: +# # This service is meant to be used by clients of the database. It exposes a ClusterIP that will +# # automatically load balance connections to the different database pods. +# name: cockroachdb-public +# labels: +# app: cockroachdb +#spec: +# ports: +# # The main port, served by gRPC, serves Postgres-flavor SQL, internode +# # traffic and the cli. +# - port: 26257 +# targetPort: 26257 +# name: grpc +# # The secondary port serves the UI as well as health and debug endpoints. +# - port: 8080 +# targetPort: 8080 +# name: http +# selector: +# app: cockroachdb +--- +apiVersion: v1 +kind: Service +metadata: + # This service only exists to create DNS entries for each pod in the stateful + # set such that they can resolve each other's IP addresses. It does not + # create a load-balanced ClusterIP and should not be used directly by clients + # in most circumstances. + #name: cockroachdb + name: bexhoma-worker + labels: {app: bexhoma, component: worker, configuration: default, experiment: default} + #labels: + # app: cockroachdb + annotations: + # Use this annotation in addition to the actual publishNotReadyAddresses + # field below because the annotation will stop being respected soon but the + # field is broken in some versions of Kubernetes: + # https://github.com/kubernetes/kubernetes/issues/58662 + #service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" + # Enable automatic monitoring of all instances when Prometheus is running in the cluster. + #prometheus.io/scrape: "true" + #prometheus.io/path: "_status/vars" + #prometheus.io/port: "8080" +spec: + ports: + - {port: 26257, protocol: TCP, name: port-dbms, targetPort: 26257} + - {port: 8080, protocol: TCP, name: port-web, targetPort: 8080} + - {port: 9300, protocol: TCP, name: port-monitoring, targetPort: 9300} + #- {port: 9300, protocol: TCP, name: port-monitoring, targetPort: 9300} + # We want all pods in the StatefulSet to have their addresses published for + # the sake of the other CockroachDB pods even before they're ready, since they + # have to be able to talk to each other in order to become ready. + publishNotReadyAddresses: true + clusterIP: None + selector: {app: bexhoma, component: worker, configuration: default, experiment: default} + #selector: + # app: cockroachdb +--- +#apiVersion: policy/v1beta1 +#kind: PodDisruptionBudget +#metadata: +# name: cockroachdb-budget +# labels: +# app: cockroachdb +#spec: +# selector: +# matchLabels: +# app: cockroachdb +# maxUnavailable: 1 +#--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: bexhoma-worker + labels: {app: bexhoma, component: worker, configuration: default, experiment: default} +#metadata: +# name: cockroachdb +spec: + serviceName: bexhoma-workers + #serviceName: "cockroachdb" + replicas: 3 + selector: + matchLabels: + {app: bexhoma, component: worker, configuration: default, experiment: default} + #selector: + # matchLabels: + # app: cockroachdb + template: + metadata: + #labels: + # app: cockroachdb + labels: {app: bexhoma, component: worker, configuration: default, experiment: default} + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - cockroachdb + topologyKey: kubernetes.io/hostname + automountServiceAccountToken: false + imagePullSecrets: + - {name: dockerhub} + tolerations: + - key: "nvidia.com/gpu" + effect: "NoSchedule" + containers: + - name: cadvisor + image: gcr.io/cadvisor/cadvisor:v0.47.0 + args: ["--port", "9300", "--storage_duration", "20m0s", "--docker_only", "true", "--disable_metrics", "disk,network,tcp,advtcp,udp,sched,process,hugetlb", "--application_metrics_count_limit", "30", "--housekeeping_interval", "5s"] + ports: + - containerPort: 9300 + #hostPort: 9300 + name: http + protocol: TCP + resources: + requests: + cpu: 150m + memory: 200Mi + volumeMounts: + - name: rootfs + mountPath: /rootfs + readOnly: true + - name: var-run + mountPath: /var/run + readOnly: true + - name: sys + mountPath: /sys + readOnly: true + - name: docker + mountPath: /var/lib/docker + readOnly: true + - name: disk + mountPath: /dev/disk + readOnly: true + - name: dbms + image: cockroachdb/cockroach:v22.1.11 + imagePullPolicy: IfNotPresent + # TODO: Change these to appropriate values for the hardware that you're running. You can see + # the resources that can be allocated on each of your Kubernetes nodes by running: + # kubectl describe nodes + # Note that requests and limits should have identical values. + resources: + limits: {cpu: 16000m, memory: 128Gi} + requests: {cpu: 16000m, memory: 128Gi} + #, ephemeral-storage: "1536Gi"} + ports: + - containerPort: 26257 + name: grpc + - containerPort: 8080 + name: http +# We recommend that you do not configure a liveness probe on a production environment, as this can impact the availability of production databases. +# livenessProbe: +# httpGet: +# path: "/health" +# port: http +# initialDelaySeconds: 30 +# periodSeconds: 5 + readinessProbe: + httpGet: + path: "/health?ready=1" + port: http + initialDelaySeconds: 10 + periodSeconds: 5 + failureThreshold: 2 + volumeMounts: + - {mountPath: /cockroach/cockroach-data/extern/data, name: benchmark-data-volume} + - name: bexhoma-workers + mountPath: /cockroach/cockroach-data + env: + - name: BEXHOMA_WORKER_LIST + value: cockroachdb-0.cockroachdb,cockroachdb-1.cockroachdb,cockroachdb-2.cockroachdb + - name: COCKROACH_CHANNEL + value: kubernetes-insecure + - name: GOMAXPROCS + valueFrom: + resourceFieldRef: + resource: limits.cpu + divisor: "1" + - name: MEMORY_LIMIT_MIB + valueFrom: + resourceFieldRef: + resource: limits.memory + divisor: "1Mi" + command: + - "/bin/bash" + - "-ecx" + # The use of qualified `hostname -f` is crucial: + # Other nodes aren't able to look up the unqualified hostname. + - exec + /cockroach/cockroach + start + --logtostderr + --insecure + --advertise-host $(hostname -f) + --http-addr 0.0.0.0 + --join $(expr $BEXHOMA_WORKER_LIST) + --cache $(expr $MEMORY_LIMIT_MIB / 4)MiB + --max-sql-memory $(expr $MEMORY_LIMIT_MIB / 4)MiB + # No pre-stop hook is required, a SIGTERM plus some time is all that's + # needed for graceful shutdown of a node. + terminationGracePeriodSeconds: 60 + volumes: + - name: benchmark-data-volume + persistentVolumeClaim: {claimName: bexhoma-data} + - name: bexhoma-workers + persistentVolumeClaim: {claimName: bexhoma-workers} + - name: rootfs + hostPath: + path: / + - name: var-run + hostPath: + path: /var/run + - name: sys + hostPath: + path: /sys + - name: docker + hostPath: + path: /var/lib/docker + - name: disk + hostPath: + path: /dev/disk + - name: dshm + emptyDir: + medium: Memory + #- name: datadir + # persistentVolumeClaim: + # claimName: datadir + podManagementPolicy: Parallel + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - metadata: + name: bexhoma-workers + labels: {app: bexhoma, component: worker, configuration: default, experiment: default} + spec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 100Gi + storageClassName: shared +--- +# Generated file, DO NOT EDIT. Source: cloud/kubernetes/templates/cluster-init.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: cluster-init + labels: + app: cockroachdb +spec: + template: + spec: + containers: + - name: cluster-init + image: cockroachdb/cockroach:v22.1.11 + imagePullPolicy: IfNotPresent + env: + - name: BEXHOMA_WORKER_LIST + value: cockroachdb-0.cockroachdb,cockroachdb-1.cockroachdb,cockroachdb-2.cockroachdb + env: + - name: BEXHOMA_WORKER_FIRST + value: cockroachdb-0.cockroachdb + command: + - "/bin/bash" + - "-ecx" + # The use of qualified `hostname -f` is crucial: + # Other nodes aren't able to look up the unqualified hostname. + - exec + /cockroach/cockroach + init + --insecure + --host=$(expr $BEXHOMA_WORKER_FIRST) + restartPolicy: OnFailure +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: {app: bexhoma, component: sut, configuration: default, experiment: default} + name: bexhoma-deployment-cockroachdb +spec: + replicas: 1 + selector: + matchLabels: {app: bexhoma, component: sut, configuration: default, experiment: default} + template: + metadata: + labels: {app: bexhoma, component: sut, configuration: default, experiment: default} + spec: + automountServiceAccountToken: false + imagePullSecrets: + - {name: dockerhub} + nodeSelector: + tolerations: + - key: "nvidia.com/gpu" + effect: "NoSchedule" + containers: + - name: dbms + image: cockroachdb/cockroach:v22.1.11 + env: + ports: + - containerPort: 26257 + resources: + limits: {cpu: 100m, memory: 1Gi} + requests: {cpu: 100m, memory: 16Gi} + command: ["/bin/sh"] + args: ["-c", "while true; do echo hello; sleep 10;done"] + volumeMounts: + - {mountPath: /data, name: benchmark-data-volume} + volumes: + - name: benchmark-data-volume + persistentVolumeClaim: {claimName: bexhoma-data} diff --git a/k8s/deploymenttemplate-MySQL.yml b/k8s/deploymenttemplate-MySQL.yml new file mode 100644 index 00000000..bc206f18 --- /dev/null +++ b/k8s/deploymenttemplate-MySQL.yml @@ -0,0 +1,110 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: {app: bexhoma, component: sut, configuration: default, experiment: default} + name: bexhoma-storage +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi + storageClassName: shared +--- +apiVersion: v1 +kind: Service +metadata: + labels: {app: bexhoma, component: sut, configuration: default, experiment: default} + name: bexhoma-service +spec: + ports: + - {port: 9091, protocol: TCP, name: port-dbms, targetPort: 3306} + - {port: 9300, protocol: TCP, name: port-monitoring, targetPort: 9300} + selector: {app: bexhoma, component: sut, configuration: default, experiment: default} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: {app: bexhoma, component: sut, configuration: default, experiment: default} + name: bexhoma-deployment-mysql +spec: + replicas: 1 + selector: + matchLabels: {app: bexhoma, component: sut, configuration: default, experiment: default} + template: + metadata: + labels: {app: bexhoma, component: sut, configuration: default, experiment: default} + spec: + automountServiceAccountToken: false + imagePullSecrets: + - {name: dockerhub} + nodeSelector: + tolerations: + - key: "nvidia.com/gpu" + effect: "NoSchedule" + containers: + - name: dbms + image: mysql/mysql-server:8.0.31 + args: ["--innodb-write-io-threads", "16"] + #args: ["--innodb-write-io-threads", "16", "--innodb-log-file-size", "4294967296"] + #args: ["--secure-file-priv", "/data/", "--innodb-write-io-threads", "16", "--innodb-log-file-size", "4294967296"] + env: + - {name: MYSQL_ALLOW_EMPTY_PASSWORD, value: 'yes'} + - {name: MYSQL_ROOT_HOST, value: '%'} + ports: + - {containerPort: 3306} + resources: + limits: {cpu: 16000m, memory: 128Gi} + requests: {cpu: 16000m, memory: 128Gi} + #, ephemeral-storage: "1536Gi"} + volumeMounts: + - {mountPath: /data, name: benchmark-data-volume} + - {mountPath: /var/lib/mysql, name: benchmark-storage-volume} + - name: cadvisor + image: gcr.io/cadvisor/cadvisor:v0.47.0 + args: ["--port", "9300", "--storage_duration", "20m0s", "--docker_only", "true", "--disable_metrics", "disk,network,tcp,advtcp,udp,sched,process,hugetlb", "--application_metrics_count_limit", "30", "--housekeeping_interval", "5s"] + ports: + - containerPort: 9300 + #hostPort: 9300 + name: http + protocol: TCP + resources: + requests: + cpu: 150m + memory: 200Mi + volumeMounts: + - name: rootfs + mountPath: /rootfs + readOnly: true + - name: var-run + mountPath: /var/run + readOnly: true + - name: sys + mountPath: /sys + readOnly: true + - name: docker + mountPath: /var/lib/docker + readOnly: true + - name: disk + mountPath: /dev/disk + readOnly: true + volumes: + - name: benchmark-data-volume + persistentVolumeClaim: {claimName: bexhoma-data} + - name: benchmark-storage-volume + persistentVolumeClaim: {claimName: bexhoma-storage} + - name: rootfs + hostPath: + path: / + - name: var-run + hostPath: + path: /var/run + - name: sys + hostPath: + path: /sys + - name: docker + hostPath: + path: /var/lib/docker + - name: disk + hostPath: + path: /dev/disk diff --git a/k8s/deploymenttemplate-bexhoma-dashboard.yml b/k8s/deploymenttemplate-bexhoma-dashboard.yml index d914e904..dc4408f6 100644 --- a/k8s/deploymenttemplate-bexhoma-dashboard.yml +++ b/k8s/deploymenttemplate-bexhoma-dashboard.yml @@ -33,7 +33,7 @@ spec: # effect: "NoSchedule" containers: - name: dashboard - image: bexhoma/evaluator_dbmsbenchmarker:v0.13.3 + image: bexhoma/evaluator_dbmsbenchmarker:v0.13.4 imagePullPolicy: IfNotPresent #imagePullPolicy: Always ports: @@ -47,7 +47,7 @@ spec: - name: bexhoma-results mountPath: /results - name: jupyter - image: bexhoma/evaluator_dbmsbenchmarker:v0.13.3 + image: bexhoma/evaluator_dbmsbenchmarker:v0.13.4 imagePullPolicy: IfNotPresent #imagePullPolicy: Always #command: ["jupyter","notebook","--no-browser", "--NotebookApp.password=\"$(echo 'admin' | python -c 'from notebook.auth import passwd;print(passwd(input()))')\"", "--allow-root"] diff --git a/k8s/jobtemplate-benchmarking-dbmsbenchmarker.yml b/k8s/jobtemplate-benchmarking-dbmsbenchmarker.yml index 215ca51e..519254b8 100644 --- a/k8s/jobtemplate-benchmarking-dbmsbenchmarker.yml +++ b/k8s/jobtemplate-benchmarking-dbmsbenchmarker.yml @@ -18,7 +18,7 @@ spec: tolerations: containers: - name: dbmsbenchmarker - image: bexhoma/benchmarker_dbmsbenchmarker:v0.13.3 + image: bexhoma/benchmarker_dbmsbenchmarker:v0.13.4 imagePullPolicy: Always #imagePullPolicy: IfNotPresent env: