Skip to content

Commit cee4b02

Browse files
authored
V0.5.18 (#97)
* Prepare next release * Docs: Intro image * K8s: Removed ExtensionsV1beta1Api (no more beta) * Started improving debug messages * Requirements: Downgraded kubernetes client (kubernetes-client/python#1718) * Loading: More infos about intended delay * Improving debug messages * Ignore spyproject * Dockerimage of DBMS as parameter * TPC-DS: Dialect of MonetDB for Q72 and Q23
1 parent f9cd3d6 commit cee4b02

File tree

11 files changed

+314
-184
lines changed

11 files changed

+314
-184
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ jars/*
88
build/*
99
bexhoma.egg-info/*
1010
dist/*
11+
.spyproject/*

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ It serves as the **orchestrator** [2] for distributed parallel benchmarking expe
99
This has been tested at Amazon Web Services, Google Cloud, Microsoft Azure, IBM Cloud und Oracle Cloud and at Minikube installations.
1010

1111
<p align="center">
12-
<img src="https://raw.githubusercontent.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/v0.5.6/docs/experiment-with-orchestrator.png" width="800">
12+
<img src="https://raw.githubusercontent.com/Beuth-Erdelt/Benchmark-Experiment-Host-Manager/master/docs/experiment-with-orchestrator.png" width="800">
1313
</p>
1414

1515
The basic workflow is [1,2]: start a virtual machine, install monitoring software and a database management system, import data, run benchmarks (external tool) and shut down everything with a single command.

bexhoma/clusters.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
"""
2-
Class to managing experiments in a Kubernetes cluster
2+
:Date: 2022-05-01
3+
:Version: 0.5
4+
:Authors: Patrick Erdelt
5+
6+
Class to managing experiments in a cluster
37
Copyright (C) 2020 Patrick Erdelt
48
59
This program is free software: you can redistribute it and/or modify
@@ -23,7 +27,7 @@
2327
import subprocess
2428
import os
2529
import time
26-
from timeit import default_timer #as timer
30+
from timeit import default_timer
2731
import psutil
2832
import logging
2933
import socket
@@ -48,14 +52,6 @@ def __init__(self, clusterconfig='cluster.config', configfolder='experiments/',
4852
self.code = code
4953
masterK8s.testdesign.__init__(self, clusterconfig=clusterconfig, configfolder=configfolder, context=context, yamlfolder=yamlfolder, code=self.code, instance=instance, volume=volume, docker=docker, script=script, queryfile=queryfile)
5054
self.max_sut = None
51-
"""
52-
self.code = code
53-
if self.code is None:
54-
self.code = str(round(time.time()))
55-
self.path = self.resultfolder+"/"+self.code
56-
if not path.isdir(self.path):
57-
makedirs(self.path)
58-
"""
5955
self.experiments = []
6056
def add_experiment(self, experiment):
6157
self.experiments.append(experiment)

bexhoma/configurations.py

Lines changed: 105 additions & 68 deletions
Large diffs are not rendered by default.

bexhoma/experiments.py

Lines changed: 63 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,29 @@
11
"""
2-
:Date: 2018-08-22
3-
:Version: 0.1
2+
:Date: 2022-05-01
3+
:Version: 0.5
44
:Authors: Patrick Erdelt
55
6-
Demo of TPC-DS in a K8s Cluster.
6+
Class for managing an experiment.
7+
This is plugged into a cluster object.
8+
It collects some configuation objects.
79
8-
# Compare 4 DBMS on different HW
9-
# 256 runs
10-
# no delay
11-
# Compare result sets
12-
# 2x each DBMS
13-
# MemSQL, OmniSci, MonetDB, PostgreSQL, maybe add MySQL, MariaDB, Kinetica?
14-
# Limit 4 CPUs
10+
Two examples included, dealing with TPC-H and TPC-DS tests.
1511
16-
This deals with the TPC-DS tests.
12+
Copyright (C) 2020 Patrick Erdelt
13+
14+
This program is free software: you can redistribute it and/or modify
15+
it under the terms of the GNU Affero General Public License as
16+
published by the Free Software Foundation, either version 3 of the
17+
License, or (at your option) any later version.
18+
19+
This program is distributed in the hope that it will be useful,
20+
but WITHOUT ANY WARRANTY; without even the implied warranty of
21+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22+
GNU Affero General Public License for more details.
23+
24+
You should have received a copy of the GNU Affero General Public License
25+
along with this program. If not, see <https://www.gnu.org/licenses/>.
1726
"""
18-
#from bexhoma import *
1927
from dbmsbenchmarker import parameter, tools, inspector
2028
import logging
2129
import urllib3
@@ -315,7 +323,7 @@ def zip(self):
315323
# include sub directories
316324
#cmd['zip_results'] = 'cd /results;zip -r {code}.zip {code}'.format(code=self.code)
317325
#fullcommand = 'kubectl exec '+pod_dashboard+' -- bash -c "'+cmd['zip_results'].replace('"','\\"')+'"'
318-
self.cluster.executeCTL(command=cmd['zip_results'], pod=pod_dashboard)#self.yamlfolder+deployment)
326+
self.cluster.executeCTL(command=cmd['zip_results'], pod=pod_dashboard, container="dashboard")#self.yamlfolder+deployment)
319327
#print(fullcommand)
320328
#proc = subprocess.Popen(fullcommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
321329
#stdout, stderr = proc.communicate()
@@ -340,26 +348,33 @@ def evaluate_results(self, pod_dashboard=''):
340348
pods = self.cluster.getPods(component='dashboard')
341349
pod_dashboard = pods[0]
342350
# copy logs and yamls to result folder
351+
print("Copy configuration and logs", end="", flush=True)
343352
directory = os.fsencode(self.path)
344353
for file in os.listdir(directory):
345-
filename = os.fsdecode(file)
346-
if filename.endswith(".log") or filename.endswith(".yml") or filename.endswith(".error"):
347-
self.cluster.kubectl('cp '+self.path+"/"+filename+' '+pod_dashboard+':/results/'+str(self.code)+'/'+filename)
354+
filename = os.fsdecode(file)
355+
if filename.endswith(".log") or filename.endswith(".yml") or filename.endswith(".error"):
356+
self.cluster.kubectl('cp '+self.path+"/"+filename+' '+pod_dashboard+':/results/'+str(self.code)+'/'+filename+' -c dashboard')
357+
print(".", end="", flush=True)
358+
print("done!")
348359
cmd = {}
349360
cmd['update_dbmsbenchmarker'] = 'git pull'#/'+str(self.code)
350361
#fullcommand = 'kubectl exec '+pod_dashboard+' -- bash -c "'+cmd['update_dbmsbenchmarker'].replace('"','\\"')+'"'
351-
self.cluster.executeCTL(command=cmd['update_dbmsbenchmarker'], pod=pod_dashboard)
362+
self.cluster.executeCTL(command=cmd['update_dbmsbenchmarker'], pod=pod_dashboard, container="dashboard")
352363
#print(fullcommand)
353364
#proc = subprocess.Popen(fullcommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
354365
#stdout, stderr = proc.communicate()
366+
print("Join results ", end="", flush=True)
355367
cmd['merge_results'] = 'python merge.py -r /results/ -c '+str(self.code)
356-
self.cluster.executeCTL(command=cmd['merge_results'], pod=pod_dashboard)
368+
self.cluster.executeCTL(command=cmd['merge_results'], pod=pod_dashboard, container="dashboard")
369+
print("done!")
357370
#fullcommand = 'kubectl exec '+pod_dashboard+' -- bash -c "'+cmd['merge_results'].replace('"','\\"')+'"'
358371
#print(fullcommand)
359372
#proc = subprocess.Popen(fullcommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
360373
#stdout, stderr = proc.communicate()
374+
print("Build evaluation cube ", end="", flush=True)
361375
cmd['evaluate_results'] = 'python benchmark.py read -e yes -r /results/'+str(self.code)
362-
self.cluster.executeCTL(command=cmd['evaluate_results'], pod=pod_dashboard)
376+
self.cluster.executeCTL(command=cmd['evaluate_results'], pod=pod_dashboard, container="dashboard")
377+
print("done!")
363378
#fullcommand = 'kubectl exec '+pod_dashboard+' -- bash -c "'+cmd['evaluate_results'].replace('"','\\"')+'"'
364379
#print(fullcommand)
365380
#proc = subprocess.Popen(fullcommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
@@ -404,30 +419,41 @@ def add_benchmark_list(self, list_clients):
404419
def work_benchmark_list(self, intervals=30, stop=True):
405420
do = True
406421
while do:
407-
time.sleep(intervals)
422+
#time.sleep(intervals)
423+
self.wait(intervals)
424+
# count number of running and pending pods
425+
num_pods_running = len(self.cluster.getPods(app = self.appname, component = 'sut', status = 'Running'))
426+
num_pods_pending = len(self.cluster.getPods(app = self.appname, component = 'sut', status = 'Pending'))
408427
for config in self.configurations:
409-
# count number of running and pending pods
410-
num_pods_running = len(self.cluster.getPods(app = self.appname, component = 'sut', status = 'Running'))
411-
num_pods_pending = len(self.cluster.getPods(app = self.appname, component = 'sut', status = 'Pending'))
412428
# check if sut is running
413429
if not config.sut_is_running():
414-
print("{} is not running".format(config.configuration))
430+
#print("{} is not running".format(config.configuration))
415431
if not config.experiment_done:
416432
if not config.sut_is_pending():
433+
print("{} is not running yet - ".format(config.configuration), end="", flush=True)
417434
if self.cluster.max_sut is not None:
418-
print("{} running and {} pending pods".format(num_pods_running, num_pods_pending))
435+
print("{} running and {} pending pods: max is {} pods in the cluster - ".format(num_pods_running, num_pods_pending, self.cluster.max_sut), end="", flush=True)
419436
if num_pods_running+num_pods_pending < self.cluster.max_sut:
437+
print("it will start now")
420438
config.start_sut()
421-
self.wait(10)
439+
num_pods_pending = num_pods_pending + 1
440+
#self.wait(10)
441+
else:
442+
print("it has to wait")
422443
else:
444+
print("it will start now")
423445
config.start_sut()
424-
self.wait(10)
446+
num_pods_pending = num_pods_pending + 1
447+
#self.wait(10)
448+
else:
449+
print("{} is pending".format(config.configuration))
425450
continue
426451
# check if loading is done
427452
config.check_load_data()
428453
# start loading
429454
if not config.loading_started:
430-
print("{} is not loaded".format(config.configuration))
455+
if config.sut_is_running():
456+
print("{} is not loaded yet".format(config.configuration))
431457
if config.monitoring_active and not config.monitoring_is_running():
432458
print("{} waits for monitoring".format(config.configuration))
433459
if not config.monitoring_is_pending():
@@ -446,7 +472,7 @@ def work_benchmark_list(self, intervals=30, stop=True):
446472
# config demands other delay
447473
delay = config.dockertemplate['delay_prepare']
448474
config.loading_after_time = now + timedelta(seconds=delay)
449-
print("{} will start loading but not before {}".format(config.configuration, config.loading_after_time.strftime('%Y-%m-%d %H:%M:%S')))
475+
print("{} will start loading but not before {} (that is in {} secs)".format(config.configuration, config.loading_after_time.strftime('%Y-%m-%d %H:%M:%S'), delay))
450476
continue
451477
# benchmark if loading is done and monitoring is ready
452478
if config.loading_finished:
@@ -510,7 +536,8 @@ def work_benchmark_list(self, intervals=30, stop=True):
510536
# status per pod
511537
for p in pods:
512538
status = self.cluster.getPodStatus(p)
513-
print(p,status)
539+
self.cluster.logger.debug('job-pod {} has status {}'.format(p, status))
540+
#print(p,status)
514541
if status == 'Succeeded':
515542
#if status != 'Running':
516543
self.cluster.store_pod_log(p)
@@ -528,21 +555,22 @@ def work_benchmark_list(self, intervals=30, stop=True):
528555
# status per job
529556
for job in jobs:
530557
success = self.cluster.getJobStatus(job)
531-
print(job, success)
558+
self.cluster.logger.debug('job {} has status {}'.format(job, success))
559+
#print(job, success)
532560
if success:
533561
self.cluster.deleteJob(job)
534562
if len(pods) == 0 and len(jobs) == 0:
535563
do = False
536564
for config in self.configurations:
537565
#if config.sut_is_pending() or config.loading_started or len(config.benchmark_list) > 0:
538566
if config.sut_is_pending():
539-
print("{} pending".format(config.configuration))
567+
self.cluster.logger.debug("{} pending".format(config.configuration))
540568
do = True
541569
if not config.loading_started:
542-
print("{} not loaded".format(config.configuration))
570+
self.cluster.logger.debug("{} not loaded".format(config.configuration))
543571
do = True
544572
if len(config.benchmark_list) > 0:
545-
print("{} still benchmarks to run".format(config.configuration))
573+
self.cluster.logger.debug("{} still benchmarks to run".format(config.configuration))
546574
do = True
547575
def benchmark_list(self, list_clients):
548576
for i, parallelism in enumerate(list_clients):
@@ -596,6 +624,7 @@ def benchmark_list(self, list_clients):
596624
break
597625
def stop_benchmarker(self, configuration=''):
598626
# all jobs of configuration - benchmarker
627+
self.cluster.logger.debug("experiment.stop_benchmarker({})".format(configuration))
599628
app = self.appname
600629
component = 'benchmarker'
601630
jobs = self.cluster.getJobs(app, component, self.code, configuration)

0 commit comments

Comments
 (0)