diff --git a/bexhoma/__init__.py b/bexhoma/__init__.py index 56c96f72..b6d92c21 100644 --- a/bexhoma/__init__.py +++ b/bexhoma/__init__.py @@ -1,4 +1,4 @@ """ The clustermanager module """ -__all__ = ["clusters", "experiments", "configurations"] +__all__ = ["evaluators", "clusters", "experiments", "configurations"] diff --git a/bexhoma/clusters.py b/bexhoma/clusters.py index 1804e2a4..66ac9c85 100644 --- a/bexhoma/clusters.py +++ b/bexhoma/clusters.py @@ -89,7 +89,7 @@ def __init__(self, clusterconfig='cluster.config', experiments_configfolder='exp configfile=f.read() self.config = eval(configfile) self.experiments_configfolder = experiments_configfolder - self.resultfolder = self.config['benchmarker']['resultfolder'] + self.resultfolder = self.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "") self.queryfile = queryfile self.clusterconfig = clusterconfig self.timeLoading = 0 @@ -1001,6 +1001,48 @@ def get_jobs(self, app='', component='', experiment='', configuration='', client # try again, if not failed due to "not found" if not e.status == 404: return self.get_jobs(app=app, component=component, experiment=experiment, configuration=configuration, client=client) + def get_jobs_labels(self, app='', component='', experiment='', configuration='', client=''): + """ + Return all jobs matching a set of labels (component/ experiment/ configuration) + + :param app: app the job belongs to + :param component: Component, for example sut or monitoring + :param experiment: Unique identifier of the experiment + :param configuration: Name of the dbms configuration + :param client: DEPRECATED? + """ + #print("getJobs") + label = '' + if len(app)==0: + app = self.appname + label += 'app='+app + if len(component)>0: + label += ',component='+component + if len(experiment)>0: + label += ',experiment='+experiment + if len(configuration)>0: + label += ',configuration='+configuration + if len(client)>0: + label += ',client='+client + self.logger.debug('get_jobs_labels '+label) + job_labels = {} + try: + api_response = self.v1batches.list_namespaced_job(self.namespace, label_selector=label)#'app='+appname) + #pprint(api_response) + if len(api_response.items) > 0: + for item in api_response.items: + job_labels[item.metadata.name] = item.metadata.labels + return job_labels + else: + return [] + except ApiException as e: + print("Exception when calling BatchV1Api->list_namespaced_job: %s\n" % e) + print("Create new access token") + self.cluster_access() + self.wait(2) + # try again, if not failed due to "not found" + if not e.status == 404: + return self.get_jobs_labels(app=app, component=component, experiment=experiment, configuration=configuration, client=client) def get_job_status(self, jobname='', app='', component='', experiment='', configuration='', client=''): """ Return status of a jobs given by name or matching a set of labels (component/ experiment/ configuration) @@ -1415,6 +1457,21 @@ def add_to_messagequeue(self, queue, data): self.logger.debug("I am using messagequeue {}".format(pod_messagequeue)) redisCommand = 'redis-cli rpush {redisQueue} {data} '.format(redisQueue=queue, data=data) self.execute_command_in_pod(command=redisCommand, pod=pod_messagequeue) + def set_pod_counter(self, queue, value=0): + """ + Add data to (Redis) message queue. + + :param queue: Name of the queue + :param data: Data to be added to queue + """ + pods_messagequeue = self.get_pods(component='messagequeue') + if len(pods_messagequeue) > 0: + pod_messagequeue = pods_messagequeue[0] + else: + pod_messagequeue = 'bexhoma-messagequeue-5ff94984ff-mv9zn' + self.logger.debug("I am using messagequeue {}".format(pod_messagequeue)) + redisCommand = 'redis-cli set {redisQueue} {value} '.format(redisQueue=queue, value=value) + self.execute_command_in_pod(command=redisCommand, pod=pod_messagequeue) diff --git a/bexhoma/configurations.py b/bexhoma/configurations.py index 98810800..48411539 100644 --- a/bexhoma/configurations.py +++ b/bexhoma/configurations.py @@ -27,6 +27,7 @@ from pprint import pprint #from kubernetes import client, config import subprocess +import re import os from timeit import default_timer import psutil @@ -41,10 +42,12 @@ import copy from datetime import datetime, timedelta import threading +from io import StringIO +import hiyapyco from dbmsbenchmarker import * -from bexhoma import clusters, experiments +from bexhoma import clusters, experiments, evaluators @@ -96,6 +99,11 @@ def __init__(self, experiment, docker=None, configuration='', script=None, alias else: self.script = self.experiment.script self.initscript = self.experiment.cluster.volumes[self.experiment.volume]['initscripts'][self.script] + self.indexing = self.experiment.indexing + if self.indexing: + self.indexscript = self.experiment.cluster.volumes[self.experiment.volume]['initscripts'][self.indexing] + else: + self.indexscript = [] self.alias = alias if num_experiment_to_apply is not None: self.num_experiment_to_apply = num_experiment_to_apply @@ -105,6 +113,7 @@ def __init__(self, experiment, docker=None, configuration='', script=None, alias #self.clients = clients self.appname = self.experiment.cluster.appname self.code = self.experiment.cluster.code + self.path = self.experiment.path self.resources = {} self.pod_sut = '' #: Name of the sut's master pod self.set_resources(**self.experiment.resources) @@ -115,9 +124,13 @@ def __init__(self, experiment, docker=None, configuration='', script=None, alias self.set_nodes(**self.experiment.nodes) self.set_maintaining_parameters(**self.experiment.maintaining_parameters) self.set_loading_parameters(**self.experiment.loading_parameters) + self.patch_loading(self.experiment.loading_patch) self.set_benchmarking_parameters(**self.experiment.benchmarking_parameters) + self.additional_labels = dict() + self.set_additional_labels(**self.experiment.additional_labels) self.experiment.add_configuration(self) self.dialect = dialect + self.use_distributed_datasource = False #: True, iff the SUT should mount 'benchmark-data-volume' as source of (non-generated) data # scaling of other components self.num_worker = worker self.num_loading = 0 @@ -126,6 +139,8 @@ def __init__(self, experiment, docker=None, configuration='', script=None, alias self.num_maintaining_pods = 0 # are there other components? self.monitoring_active = experiment.monitoring_active + self.prometheus_interval = experiment.prometheus_interval + self.prometheus_timeout = experiment.prometheus_timeout self.maintaining_active = experiment.maintaining_active self.loading_active = experiment.loading_active self.jobtemplate_maintaining = "" @@ -136,18 +151,29 @@ def __init__(self, experiment, docker=None, configuration='', script=None, alias self.dockerimage = dockerimage #: Name of the Docker image of the SUT self.connection_parameter = {} #: Collect all parameters that might be interesting in evaluation of results self.timeLoading = 0 #: Time in seconds the system has taken for the initial loading of data + self.timeGenerating = 0 #: Time in seconds the system has taken for generating the data + self.timeIngesting = 0 #: Time in seconds the system has taken for ingesting existing + self.timeSchema = 0 #: Time in seconds the system has taken for creating the db schema + self.timeIndex = 0 #: Time in seconds the system has taken for indexing the database + self.times_scripts = dict() # contains times for each single script that is run on db (create schema, index etc) self.loading_started = False #: Time as an integer when initial loading has started self.loading_after_time = None #: Time as an integer when initial loading should start - to give the system time to start up completely self.loading_finished = False #: Time as an integer when initial loading has finished self.client = 1 #: If we have a sequence of benchmarkers, this tells at which position we are self.timeLoadingStart = 0 - self.timeLoadingEnd = 0 + self.timeLoadingEnd = 0 + self.loading_timespans = {} # Dict of lists per container of (start,end) pairs containing time markers of loading pods + self.benchmarking_timespans = {} # Dict of lists per container of (start,end) pairs containing time markers of benchmarking pods self.reset_sut() def reset_sut(self): """ Forget that the SUT has been loaded and benchmarked. """ self.timeLoading = 0 #: Time the system has taken for the initial loading of data + self.timeGenerating = 0 #: Time in seconds the system has taken for generating the data + self.timeIngesting = 0 #: Time in seconds the system has taken for ingesting existing + self.timeSchema = 0 #: Time in seconds the system has taken for creating the db schema + self.timeIndex = 0 #: Time in seconds the system has taken for indexing the database self.loading_started = False #: Time as an integer when initial loading has started self.loading_after_time = None #: Time as an integer when initial loading should start - to give the system time to start up completely self.loading_finished = False #: Time as an integer when initial loading has finished @@ -230,11 +256,20 @@ def set_storage(self, **kwargs): :param kwargs: Dict of meta data, example 'storageSize' => '100Gi' """ self.storage = kwargs + def set_additional_labels(self, **kwargs): + """ + Sets additional labels, that will be put to K8s objects (and ignored otherwise). + This is for the SUT component. + Can be set by experiment before creation of configuration. + + :param kwargs: Dict of labels, example 'SF' => 100 + """ + self.additional_labels = {**self.additional_labels, **kwargs} def set_ddl_parameters(self, **kwargs): """ Sets DDL parameters for the experiments. This substitutes placeholders in DDL script. - Can be overwritten by configuration. + Can be set by experiment before creation of configuration. :param kwargs: Dict of meta data, example 'index' => 'btree' """ @@ -280,6 +315,14 @@ def set_loading_parameters(self, **kwargs): :param kwargs: Dict of meta data, example 'PARALLEL' => '64' """ self.loading_parameters = kwargs + def patch_loading(self, patch): + """ + Patches YAML of loading components. + Can be set by experiment before creation of configuration. + + :param patch: String in YAML format, overwrites basic YAML file content + """ + self.loading_patch = patch def set_benchmarking_parameters(self, **kwargs): """ Sets ENV for benchmarking components. @@ -307,7 +350,7 @@ def set_loading(self, parallel, num_pods=None): self.num_loading_pods = self.num_loading def set_nodes(self, **kwargs): self.nodes = kwargs - def set_experiment(self, instance=None, volume=None, docker=None, script=None): + def set_experiment(self, instance=None, volume=None, docker=None, script=None, indexing=None): """ Read experiment details from cluster config""" #self.bChangeInstance = True #if instance is not None: @@ -321,6 +364,9 @@ def set_experiment(self, instance=None, volume=None, docker=None, script=None): if script is not None: self.script = script self.initscript = self.experiment.cluster.volumes[self.experiment.volume]['initscripts'][self.script] + if indexing is not None: + self.indexing = indexing + self.indexscript = self.experiment.cluster.volumes[self.experiment.volume]['initscripts'][self.indexing] def __OLD_prepare(self, instance=None, volume=None, docker=None, script=None, delay=0): """ Per config: Startup SUT and Monitoring """ #self.setExperiment(instance, volume, docker, script) @@ -502,6 +548,9 @@ def start_loading_pod(self, app='', component='loading', experiment='', configur for i in range(1, self.num_loading+1): #redisClient.rpush(redisQueue, i) self.experiment.cluster.add_to_messagequeue(queue=redisQueue, data=i) + # reset number of clients + redisQueue = '{}-{}-{}-{}'.format(app, 'loader-podcount', self.configuration, self.code) + self.experiment.cluster.set_pod_counter(queue=redisQueue, value=0) # start job job = self.create_manifest_loading(app=app, component='loading', experiment=experiment, configuration=configuration, parallelism=parallelism, num_pods=num_pods) self.logger.debug("Deploy "+job) @@ -553,7 +602,7 @@ def start_loading(self, delay=0): self.check_load_data() if not self.loading_started: #print("load_data") - self.load_data() + self.load_data(scripts=self.initscript) # we do not test at localhost (forwarded), because there might be conflicts #self.experiment.cluster.stopPortforwarding() # store experiment needs new format @@ -676,20 +725,22 @@ def start_monitoring(self, app='', component='monitoring', experiment='', config dep['spec']['template']['metadata']['labels'] = dep['metadata']['labels'].copy() dep['spec']['selector']['matchLabels'] = dep['metadata']['labels'].copy() envs = dep['spec']['template']['spec']['containers'][0]['env'] + #prometheus_interval = "15s" + #prometheus_timeout = "15s" prometheus_config = """global: scrape_interval: 15s scrape_configs: - job_name: '{master}' - scrape_interval: 3s - scrape_timeout: 3s + scrape_interval: {prometheus_interval} + scrape_timeout: {prometheus_timeout} static_configs: - targets: ['{master}:9300'] - job_name: 'monitor-gpu' - scrape_interval: 3s - scrape_timeout: 3s + scrape_interval: {prometheus_interval} + scrape_timeout: {prometheus_timeout} static_configs: - - targets: ['{master}:9400']""".format(master=name_sut) + - targets: ['{master}:9400']""".format(master=name_sut, prometheus_interval=self.prometheus_interval, prometheus_timeout=self.prometheus_timeout) # services of workers name_worker = self.generate_component_name(component='worker', configuration=self.configuration, experiment=self.code) pods_worker = self.experiment.cluster.get_pods(component='worker', configuration=self.configuration, experiment=self.code) @@ -698,10 +749,10 @@ def start_monitoring(self, app='', component='monitoring', experiment='', config print('Worker: {worker}.{service_sut}'.format(worker=pod, service_sut=name_worker)) prometheus_config += """ - job_name: '{worker}' - scrape_interval: 3s - scrape_timeout: 3s + scrape_interval: {prometheus_interval} + scrape_timeout: {prometheus_timeout} static_configs: - - targets: ['{worker}.{service_sut}:9300']""".format(worker=pod, service_sut=name_worker, client=i) + - targets: ['{worker}.{service_sut}:9300']""".format(worker=pod, service_sut=name_worker, client=i, prometheus_interval=self.prometheus_interval, prometheus_timeout=self.prometheus_timeout) i = i + 1 for i,e in enumerate(envs): if e['name'] == 'BEXHOMA_SERVICE': @@ -781,7 +832,7 @@ def stop_maintaining(self, app='', component='maintaining', experiment='', confi container = 'datagenerator' stdout = self.experiment.cluster.pod_log(pod=pod, container=container) #stdin, stdout, stderr = self.pod_log(client_pod_name) - filename_log = self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/'+pod+'.'+container+'.log' + filename_log = self.path+'/'+pod+'.'+container+'.log' f = open(filename_log, "w") f.write(stdout) f.close() @@ -789,7 +840,7 @@ def stop_maintaining(self, app='', component='maintaining', experiment='', confi container = 'sensor' stdout = self.experiment.cluster.pod_log(pod=pod, container='sensor') #stdin, stdout, stderr = self.pod_log(client_pod_name) - filename_log = self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/'+pod+'.'+container+'.log' + filename_log = self.path+'/'+pod+'.'+container+'.log' f = open(filename_log, "w") f.write(stdout) f.close() @@ -878,6 +929,7 @@ def start_sut(self, app='', component='sut', experiment='', configuration=''): :param configuration: Name of the dbms configuration """ use_storage = self.use_storage() + use_data = self.use_distributed_datasource #storage_label = 'tpc-ds-1' if len(app)==0: app = self.appname @@ -946,6 +998,8 @@ def start_sut(self, app='', component='sut', experiment='', configuration=''): dep['metadata']['labels']['dbms'] = self.docker dep['metadata']['labels']['volume'] = self.volume dep['metadata']['labels']['loaded'] = "False" + for label_key, label_value in self.additional_labels.items(): + dep['metadata']['labels'][label_key] = str(label_value) if self.storage['storageClassName'] is not None and len(self.storage['storageClassName']) > 0: dep['spec']['storageClassName'] = self.storage['storageClassName'] #print(dep['spec']['storageClassName']) @@ -988,6 +1042,8 @@ def start_sut(self, app='', component='sut', experiment='', configuration=''): dep['metadata']['labels']['experiment'] = experiment dep['metadata']['labels']['dbms'] = self.docker dep['metadata']['labels']['volume'] = self.volume + for label_key, label_value in self.additional_labels.items(): + dep['metadata']['labels'][label_key] = str(label_value) dep['spec']['replicas'] = self.num_worker dep['spec']['serviceName'] = name_worker dep['spec']['selector']['matchLabels'] = dep['metadata']['labels'].copy() @@ -1044,6 +1100,8 @@ def start_sut(self, app='', component='sut', experiment='', configuration=''): dep['metadata']['labels']['experiment'] = experiment dep['metadata']['labels']['dbms'] = self.docker dep['metadata']['labels']['volume'] = self.volume + for label_key, label_value in self.additional_labels.items(): + dep['metadata']['labels'][label_key] = str(label_value) for i_container, container in enumerate(dep['spec']['template']['spec']['containers']): #container = dep['spec']['template']['spec']['containers'][0]['name'] self.logger.debug('configuration.add_env({})'.format(env)) @@ -1062,6 +1120,8 @@ def start_sut(self, app='', component='sut', experiment='', configuration=''): dep['metadata']['labels']['experiment'] = experiment dep['metadata']['labels']['dbms'] = self.docker dep['metadata']['labels']['volume'] = self.volume + for label_key, label_value in self.additional_labels.items(): + dep['metadata']['labels'][label_key] = str(label_value) #dep['spec']['selector'] = dep['metadata']['labels'].copy() dep['spec']['selector']['configuration'] = configuration dep['spec']['selector']['experiment'] = experiment @@ -1079,6 +1139,8 @@ def start_sut(self, app='', component='sut', experiment='', configuration=''): dep['metadata']['labels']['experiment'] = experiment dep['metadata']['labels']['dbms'] = self.docker dep['metadata']['labels']['volume'] = self.volume + for label_key, label_value in self.additional_labels.items(): + dep['metadata']['labels'][label_key] = str(label_value) #dep['spec']['selector'] = dep['metadata']['labels'].copy() dep['spec']['selector']['configuration'] = configuration dep['spec']['selector']['experiment'] = experiment @@ -1101,6 +1163,8 @@ def start_sut(self, app='', component='sut', experiment='', configuration=''): dep['metadata']['labels']['experiment'] = experiment dep['metadata']['labels']['dbms'] = self.docker dep['metadata']['labels']['volume'] = self.volume + for label_key, label_value in self.additional_labels.items(): + dep['metadata']['labels'][label_key] = str(label_value) dep['metadata']['labels']['experimentRun'] = str(self.num_experiment_to_apply_done+1) dep['spec']['selector']['matchLabels'] = dep['metadata']['labels'].copy() dep['spec']['template']['metadata']['labels'] = dep['metadata']['labels'].copy() @@ -1112,11 +1176,15 @@ def start_sut(self, app='', component='sut', experiment='', configuration=''): if container['name'] == 'dbms': #print(container['volumeMounts']) if 'volumeMounts' in container and len(container['volumeMounts']) > 0: - for j, vol in enumerate(container['volumeMounts']): + for j, vol in reversed(list(enumerate(container['volumeMounts']))): if vol['name'] == 'benchmark-storage-volume': #print(vol['mountPath']) if not use_storage: del result[key]['spec']['template']['spec']['containers'][i]['volumeMounts'][j] + if vol['name'] == 'benchmark-data-volume': + #print(vol['mountPath']) + if not use_data: + del result[key]['spec']['template']['spec']['containers'][i]['volumeMounts'][j] if self.dockerimage: result[key]['spec']['template']['spec']['containers'][i]['image'] = self.dockerimage else: @@ -1134,6 +1202,9 @@ def start_sut(self, app='', component='sut', experiment='', configuration=''): del result[key]['spec']['template']['spec']['volumes'][i] else: vol['persistentVolumeClaim']['claimName'] = name_pvc + if vol['name'] == 'benchmark-data-volume': + if not use_data: + del result[key]['spec']['template']['spec']['volumes'][i] if 'hostPath' in vol and not self.monitoring_active: # we only need hostPath for monitoring del result[key]['spec']['template']['spec']['volumes'][i] @@ -1482,7 +1553,7 @@ def getTimediff(self): return int(timestamp_remote)-int(timestamp_local) def get_host_diskspace_used_data(self): """ - Returns information about the sut's host disk space used for the data (the database). + Returns information about the sut's host disk space used for the data (the database) in kilobyte. Basically this calls `du` on the host directory that is mentioned in cluster.config as to store the database. :return: Size of disk used for database in Bytes @@ -1583,7 +1654,12 @@ def get_connection_config(self, connection, alias='', dialect='', serverip='loca c['docker'] = self.docker c['script'] = self.script c['info'] = info - c['timeLoad'] = self.timeLoading + c['timeLoad'] = self.timeLoading # max span (generate + ingest) + schema + index + c['timeGenerate'] = self.timeGenerating + c['timeIngesting'] = self.timeIngesting + c['timeSchema'] = self.timeSchema + c['timeIndex'] = self.timeIndex + c['script_times'] = self.times_scripts c['priceperhourdollar'] = 0.0 + self.dockertemplate['priceperhourdollar'] # get hosts information pods = self.experiment.cluster.get_pods(component='sut', configuration=self.configuration, experiment=self.code) @@ -1649,7 +1725,7 @@ def get_connection_config(self, connection, alias='', dialect='', serverip='loca c['JDBC']['url'] = c['JDBC']['url'].format(serverip=serverip, dbname=self.experiment.volume, DBNAME=self.experiment.volume.upper(), timout_s=c['connectionmanagement']['timeout'], timeout_ms=c['connectionmanagement']['timeout']*1000) #print(c) return c#.copy() - def fetch_metrics_loading(self, connection=None, configuration=''): + def OLD_fetch_metrics_loading(self, connection=None, configuration=''): self.logger.debug('configuration.fetch_metrics()') # set general parameter resultfolder = self.experiment.cluster.config['benchmarker']['resultfolder'] @@ -1674,7 +1750,7 @@ def fetch_metrics_loading(self, connection=None, configuration=''): time_end = int(self.timeLoadingEnd) query = "loading" # store configuration - basepath_local = self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/' + basepath_local = self.path+'/' basepath_remote = '/results/'+str(self.code)+'/' file = c['name']+'.config' file_local = basepath_local+file @@ -1723,21 +1799,32 @@ def run_benchmarker_pod(self, :param parallelism: Number of parallel benchmarker pods we want to have """ self.logger.debug('configuration.run_benchmarker_pod()') - # set general parameter resultfolder = self.experiment.cluster.config['benchmarker']['resultfolder'] experiments_configfolder = self.experiment.cluster.experiments_configfolder + app = self.appname if connection is None: - connection = self.configuration + connection = self.configuration#self.getConnectionName() if len(configuration) == 0: configuration = connection code = self.code if not isinstance(client, str): client = str(client) + if not self.client: + self.client = client if len(dialect) == 0 and len(self.dialect) > 0: dialect = self.dialect + # set more parameters experimentRun = str(self.num_experiment_to_apply_done+1) + #self.experiment.cluster.stopPortforwarding() # set query management for new query file tools.query.template = self.experiment.querymanagement + # store information about current benchmark + self.current_benchmark_connection = connection + now = datetime.utcnow() + now_string = now.strftime('%Y-%m-%d %H:%M:%S') + time_now = str(datetime.now()) + time_now_int = int(datetime.timestamp(datetime.strptime(time_now,'%Y-%m-%d %H:%M:%S.%f'))) + self.current_benchmark_start = int(time_now_int) # get connection config (sut) monitoring_host = self.generate_component_name(component='monitoring', configuration=configuration, experiment=self.code) service_name = self.generate_component_name(component='sut', configuration=configuration, experiment=self.code) @@ -1745,20 +1832,26 @@ def run_benchmarker_pod(self, service_host = self.experiment.cluster.contextdata['service_sut'].format(service=service_name, namespace=service_namespace) pods = self.experiment.cluster.get_pods(component='sut', configuration=configuration, experiment=self.code) self.pod_sut = pods[0] - c = self.get_connection_config(connection, alias, dialect, serverip=service_host, monitoring_host=monitoring_host) + #service_port = config_K8s['port'] + c = self.get_connection_config(connection, alias, dialect, serverip=service_host, monitoring_host=monitoring_host)#config_K8s['ip']) + #c['parameter'] = {} c['parameter'] = self.eval_parameters c['parameter']['parallelism'] = parallelism c['parameter']['client'] = client c['parameter']['numExperiment'] = experimentRun c['parameter']['dockerimage'] = self.dockerimage c['parameter']['connection_parameter'] = self.connection_parameter - c['parameter']['storage_parameter'] = self.storage + c['hostsystem']['loading_timespans'] = self.loading_timespans + c['hostsystem']['benchmarking_timespans'] = self.benchmarking_timespans + #print(c) + #print(self.experiment.cluster.config['benchmarker']['jarfolder']) if isinstance(c['JDBC']['jar'], list): for i, j in enumerate(c['JDBC']['jar']): c['JDBC']['jar'][i] = self.experiment.cluster.config['benchmarker']['jarfolder']+c['JDBC']['jar'][i] elif isinstance(c['JDBC']['jar'], str): c['JDBC']['jar'] = self.experiment.cluster.config['benchmarker']['jarfolder']+c['JDBC']['jar'] #print(c) + self.logger.debug('configuration.run_benchmarker_pod(): {}'.format(connection)) self.benchmark = benchmarker.benchmarker( fixedConnection=connection, fixedQuery=query, @@ -1767,7 +1860,9 @@ def run_benchmarker_pod(self, working='connection', code=code ) - #self.code = self.benchmark.code + #self.benchmark.code = '1611607321' + self.code = self.benchmark.code + #print("Code", self.code) self.logger.debug('configuration.run_benchmarker_pod(Code={})'.format(self.code)) # read config for benchmarker connectionfile = experiments_configfolder+'/connections.config' @@ -1784,7 +1879,7 @@ def run_benchmarker_pod(self, # NEVER rerun, only one connection in config for detached: #self.benchmark.connections = [c] #print(self.benchmark.connections) - self.logger.debug('configuration.run_benchmarker_pod(): {}'.format(self.benchmark.connections)) + #self.logger.debug('configuration.run_benchmarker_pod(): {}'.format(self.benchmark.connections)) self.benchmark.dbms[c['name']] = tools.dbms(c, False) # copy or generate config folder (query and connection) # add connection to existing list @@ -1813,9 +1908,31 @@ def run_benchmarker_pod(self, experiment['connection'] = connection experiment['connectionmanagement'] = self.connectionmanagement.copy() self.experiment.cluster.log_experiment(experiment) - # create pod - #yamlfile = self.create_job(connection=connection, component=component, configuration=configuration, experiment=self.code, client=client, parallelism=parallelism, alias=c['alias']) - yamlfile = self.create_manifest_benchmarking(connection=connection, app=app, component='benchmarker', experiment=self.code, configuration=configuration, experimentRun=experimentRun, client=client, parallelism=parallelism, alias=c['alias'], num_pods=parallelism)#, env=env, template=template) + # copy config to pod - dashboard + pods = self.experiment.cluster.get_pods(component='dashboard') + if len(pods) > 0: + pod_dashboard = pods[0] + cmd = {} + cmd['prepare_log'] = 'mkdir -p /results/'+str(self.code) + stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['prepare_log'], pod=pod_dashboard, container="dashboard") + stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.path+'/queries.config '+pod_dashboard+':/results/'+str(self.code)+'/queries.config') + self.logger.debug('copy config queries.config: {}'.format(stdout)) + stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.path+'/'+c['name']+'.config '+pod_dashboard+':/results/'+str(self.code)+'/'+c['name']+'.config') + self.logger.debug('copy config {}: {}'.format(c['name']+'.config', stdout)) + # copy twice to be more sure it worked + stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.path+'/'+c['name']+'.config '+pod_dashboard+':/results/'+str(self.code)+'/'+c['name']+'.config') + self.logger.debug('copy config {}: {}'.format(c['name']+'.config', stdout)) + stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.path+'/connections.config '+pod_dashboard+':/results/'+str(self.code)+'/connections.config') + self.logger.debug('copy config connections.config: {}'.format(stdout)) + stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.path+'/protocol.json '+pod_dashboard+':/results/'+str(self.code)+'/protocol.json') + self.logger.debug('copy config protocol.json: {}'.format(stdout)) + # put list of clients to message queue + redisQueue = '{}-{}-{}-{}'.format(app, component, connection, self.code) + for i in range(1, parallelism+1): + #redisClient.rpush(redisQueue, i) + self.experiment.cluster.add_to_messagequeue(queue=redisQueue, data=i) + # create pods + yamlfile = self.create_manifest_benchmarking(connection=connection, component=component, configuration=configuration, experiment=self.code, experimentRun=experimentRun, client=client, parallelism=parallelism, alias=c['alias'], num_pods=parallelism) # start pod self.experiment.cluster.kubectl('create -f '+yamlfile) pods = [] @@ -1838,27 +1955,54 @@ def run_benchmarker_pod(self, client_pod_name = pods[0] status = self.experiment.cluster.get_pod_status(client_pod_name) print("found") - # copy config to pod - cmd = {} - cmd['prepare_log'] = 'mkdir -p /results/'+str(self.code) - stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['prepare_log'], pod=client_pod_name) - stdout = self.experiment.cluster.kubectl('cp '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/queries.config '+client_pod_name+':/results/'+str(self.code)+'/queries.config') - self.logger.debug('copy config queries.config: {}'.format(stdout)) - stdout = self.experiment.cluster.kubectl('cp '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/'+c['name']+'.config '+client_pod_name+':/results/'+str(self.code)+'/'+c['name']+'.config') - self.logger.debug('copy config {}: {}'.format(c['name']+'.config', stdout)) - # copy twice to be more sure it worked - stdout = self.experiment.cluster.kubectl('cp '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/'+c['name']+'.config '+client_pod_name+':/results/'+str(self.code)+'/'+c['name']+'.config') - self.logger.debug('copy config {}: {}'.format(c['name']+'.config', stdout)) - stdout = self.experiment.cluster.kubectl('cp '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/connections.config '+client_pod_name+':/results/'+str(self.code)+'/connections.config') - self.logger.debug('copy config connections.config: {}'.format(stdout)) - stdout = self.experiment.cluster.kubectl('cp '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/protocol.json '+client_pod_name+':/results/'+str(self.code)+'/protocol.json') - self.logger.debug('copy config protocol.json: {}'.format(stdout)) # get monitoring for loading + """ if self.monitoring_active: + print("get monitoring for loading") + logger = logging.getLogger('dbmsbenchmarker') + logging.basicConfig(level=logging.DEBUG) + for connection_number, connection_data in self.benchmark.dbms.items(): + #connection = self.benchmark.dbms[c['name']] + print(connection_number, connection_data) + print(connection_data.connectiondata['monitoring']['prometheus_url']) + query='loading' + for m, metric in connection_data.connectiondata['monitoring']['metrics'].items(): + print(m) + monitor.metrics.fetchMetric(query, m, connection_number, connection_data.connectiondata, int(self.timeLoadingStart), int(self.timeLoadingEnd), '{result_path}'.format(result_path=self.benchmark.path)) + """ + # copy config to pod - dashboard + pods = self.experiment.cluster.get_pods(component='dashboard') + if len(pods) > 0: + pod_dashboard = pods[0] cmd = {} - #cmd['fetch_loading_metrics'] = 'python metrics.py -r /results/ -c {} -ts {} -te {}'.format(self.code, self.timeLoadingStart, self.timeLoadingEnd) - cmd['fetch_loading_metrics'] = 'python metrics.py -r /results/ -c {} -e {} -ts {} -te {}'.format(connection, self.code, self.timeLoadingStart, self.timeLoadingEnd) - stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['fetch_loading_metrics'], pod=client_pod_name) + """ + cmd['prepare_log'] = 'mkdir -p /results/'+str(self.code) + stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['prepare_log'], pod=pod_dashboard, container="dashboard") + stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.path+'/queries.config '+pod_dashboard+':/results/'+str(self.code)+'/queries.config') + self.logger.debug('copy config queries.config: {}'.format(stdout)) + stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.path+'/'+c['name']+'.config '+pod_dashboard+':/results/'+str(self.code)+'/'+c['name']+'.config') + self.logger.debug('copy config {}: {}'.format(c['name']+'.config', stdout)) + # copy twice to be more sure it worked + stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.path+'/'+c['name']+'.config '+pod_dashboard+':/results/'+str(self.code)+'/'+c['name']+'.config') + self.logger.debug('copy config {}: {}'.format(c['name']+'.config', stdout)) + stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.path+'/connections.config '+pod_dashboard+':/results/'+str(self.code)+'/connections.config') + self.logger.debug('copy config connections.config: {}'.format(stdout)) + stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.path+'/protocol.json '+pod_dashboard+':/results/'+str(self.code)+'/protocol.json') + self.logger.debug('copy config protocol.json: {}'.format(stdout)) + """ + # get monitoring for loading + if self.monitoring_active: + cmd = {} + #cmd['fetch_loading_metrics'] = 'python metrics.py -r /results/ -c {} -cf {} -f {} -e {} -ts {} -te {}'.format(connection, c['name']+'.config', '/results/'+self.code, self.code, self.timeLoadingStart, self.timeLoadingEnd) + cmd['fetch_loading_metrics'] = 'python metrics.py -r /results/ -db -ct loading -c {} -cf {} -f {} -e {} -ts {} -te {}'.format(connection, c['name']+'.config', '/results/'+self.code, self.code, self.timeLoadingStart, self.timeLoadingEnd) + stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['fetch_loading_metrics'], pod=pod_dashboard, container="dashboard") + self.logger.debug(stdout) + self.logger.debug(stderr) + # upload connections infos again, metrics has overwritten it + filename = 'connections.config' + cmd['upload_connection_file'] = 'cp {from_file} {to} -c dashboard'.format(to=pod_dashboard+':/results/'+str(self.code)+'/'+filename, from_file=self.path+"/"+filename) + stdout = self.experiment.cluster.kubectl(cmd['upload_connection_file']) + self.logger.debug(stdout) def execute_command_in_pod_sut(self, command, pod='', container='dbms', params=''): """ Runs an shell command remotely inside a container of a pod. @@ -1888,7 +2032,7 @@ def copyLog(self): stdin, stdout, stderr = self.execute_command_in_pod_sut(command=cmd['prepare_log']) cmd['save_log'] = 'cp '+self.dockertemplate['logfile']+' /data/'+str(self.code)+'/'+self.configuration+'.log' stdin, stdout, stderr = self.execute_command_in_pod_sut(command=cmd['save_log']) - def prepare_init_dbms(self): + def prepare_init_dbms(self, scripts): """ Prepares to load data into the dbms. This copies the DDL scripts to /tmp on the host of the sut. @@ -1900,7 +2044,8 @@ def prepare_init_dbms(self): self.pod_sut = pods[0] scriptfolder = '/tmp/' if len(self.ddl_parameters): - for script in self.initscript: + #for script in self.initscript: + for script in scripts: filename_template = self.docker+'/'+script if os.path.isfile(self.experiment.cluster.experiments_configfolder+'/'+filename_template): with open(self.experiment.cluster.experiments_configfolder+'/'+filename_template, "r") as initscript_template: @@ -1911,7 +2056,8 @@ def prepare_init_dbms(self): initscript_filled.write(data) self.experiment.cluster.kubectl('cp --container dbms {from_name} {to_name}'.format(from_name=self.experiment.cluster.experiments_configfolder+'/'+filename_filled, to_name=self.pod_sut+':'+scriptfolder+script)) else: - for script in self.initscript: + #for script in self.initscript: + for script in scripts: filename = self.docker+'/'+script if os.path.isfile(self.experiment.cluster.experiments_configfolder+'/'+filename): self.experiment.cluster.kubectl('cp --container dbms {from_name} {to_name}'.format(from_name=self.experiment.cluster.experiments_configfolder+'/'+filename, to_name=self.pod_sut+':'+scriptfolder+script)) @@ -1983,11 +2129,54 @@ def check_load_data(self): if len(pod_labels) > 0: pod = next(iter(pod_labels.keys())) if 'timeLoadingStart' in pod_labels[pod]: - self.timeLoadingStart = float(pod_labels[pod]['timeLoadingStart']) + self.timeLoadingStart = int(pod_labels[pod]['timeLoadingStart']) if 'timeLoadingEnd' in pod_labels[pod]: - self.timeLoadingEnd = float(pod_labels[pod]['timeLoadingEnd']) + self.timeLoadingEnd = int(pod_labels[pod]['timeLoadingEnd']) if 'timeLoading' in pod_labels[pod]: self.timeLoading = float(pod_labels[pod]['timeLoading']) + if 'timeIndex' in pod_labels[pod]: + self.timeIndex = float(pod_labels[pod]['timeIndex']) + for key, value in pod_labels[pod].items(): + if key.startswith("time_"): + time_type = key[len("time_"):] + self.times_scripts[time_type] = float(value) + # delete job and all its pods + self.experiment.cluster.delete_job(job) + pods = self.experiment.cluster.get_job_pods(app=app, component=component, experiment=experiment, configuration=configuration) + for pod in pods: + status = self.experiment.cluster.get_pod_status(pod) + print(pod, status) + #if status == "Running": + # TODO: Find names of containers dynamically + container = 'datagenerator' + stdout = self.experiment.cluster.pod_log(pod=pod, container=container) + #stdin, stdout, stderr = self.pod_log(client_pod_name) + filename_log = self.path+'/'+pod+'.'+container+'.log' + f = open(filename_log, "w") + f.write(stdout) + f.close() + # + container = 'sensor' + stdout = self.experiment.cluster.pod_log(pod=pod, container='sensor') + #stdin, stdout, stderr = self.pod_log(client_pod_name) + filename_log = self.path+'/'+pod+'.'+container+'.log' + f = open(filename_log, "w") + f.write(stdout) + f.close() + self.experiment.cluster.delete_pod(pod) + self.experiment.end_loading(job) + loading_pods_active = False + if self.monitoring_active: + #cmd = {} + #cmd['fetch_loading_metrics'] = 'python metrics.py -r /results/ -c {} -ts {} -te {}'.format(self.code, self.timeLoadingStart, self.timeLoadingEnd) + #cmd['fetch_loading_metrics'] = 'python metrics.py -r /results/ -c {} -e {} -ts {} -te {}'.format(connection, self.code, self.timeLoadingStart, self.timeLoadingEnd) + #self.experiment.cluster.logger.debug('load_metrics:{}'.format(cmd['fetch_loading_metrics'])) + #stdout = os.popen(cmd['fetch_loading_metrics']).read()# os.system(fullcommand) + #stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['fetch_loading_metrics'], pod=client_pod_name) + #print(stdout) + # currently, only benchmarking fetches loading metrics + #self.fetch_metrics_loading(connection=self.configuration) + pass # mark pod with new end time and duration pods_sut = self.experiment.cluster.get_pods(app=app, component='sut', experiment=experiment, configuration=configuration) if len(pods_sut) > 0: @@ -1999,17 +2188,56 @@ def check_load_data(self): #self.experiment.cluster.logger.debug(float(self.timeLoadingEnd)) #self.experiment.cluster.logger.debug(float(self.timeLoadingStart)) #self.timeLoading = float(self.timeLoading) + float(timeLoading) + timing_datagenerator, timing_sensor, timing_total = self.experiment.get_job_timing_loading(job) + generator_time = 0 + loader_time = 0 + total_time = 0 + self.loading_timespans = {} + self.loading_timespans['datagenerator'] = timing_datagenerator + self.loading_timespans['sensor'] = timing_sensor + self.loading_timespans['total'] = timing_total + if len(timing_datagenerator) > 0: + print([end-start for (start,end) in timing_datagenerator]) + timing_start = min([start for (start,end) in timing_datagenerator]) + timing_end = max([end for (start,end) in timing_datagenerator]) + total_time = timing_end - timing_start + generator_time = total_time + print("Generator", total_time) + #timing_sensor = extract_timing(jobname, container="sensor") + if len(timing_sensor) > 0: + print([end-start for (start,end) in timing_sensor]) + timing_start = min([start for (start,end) in timing_sensor]) + timing_end = max([end for (start,end) in timing_sensor]) + total_time = timing_end - timing_start + loader_time = total_time + print("Loader", total_time) + if len(timing_datagenerator) > 0 and len(timing_sensor) > 0: + timing_total = timing_datagenerator + timing_sensor + print(timing_total) + timing_start = min([start for (start,end) in timing_total]) + timing_end = max([end for (start,end) in timing_total]) + total_time = timing_end - timing_start + print("Total", total_time) now = datetime.utcnow() now_string = now.strftime('%Y-%m-%d %H:%M:%S') time_now = str(datetime.now()) time_now_int = int(datetime.timestamp(datetime.strptime(time_now,'%Y-%m-%d %H:%M:%S.%f'))) self.timeLoadingEnd = int(time_now_int) - self.timeLoading = int(self.timeLoadingEnd) - int(self.timeLoadingStart) + self.timeLoading + # store preloading time (should be for schema creation) + self.timeSchema = self.timeLoading + if total_time > 0: + # this sets the loading time to the max span of pods + self.timeLoading = total_time + self.timeLoading + else: + # this sets the loading time to the span until "now" (including waiting and starting overhead) + self.timeLoading = int(self.timeLoadingEnd) - int(self.timeLoadingStart) + self.timeLoading + self.timeGenerating = generator_time + self.timeIngesting = loader_time self.experiment.cluster.logger.debug("LOADING LABELS") self.experiment.cluster.logger.debug(self.timeLoadingStart) self.experiment.cluster.logger.debug(self.timeLoadingEnd) self.experiment.cluster.logger.debug(self.timeLoading) - fullcommand = 'label pods '+pod_sut+' --overwrite loaded=True timeLoadingEnd="{}" timeLoading={}'.format(time_now_int, self.timeLoading) + fullcommand = 'label pods '+pod_sut+' --overwrite loaded=True timeLoadingEnd="{}" timeLoadingStart="{}" time_ingested={} timeLoading={} time_generated={}'.format(self.timeLoadingEnd, self.timeLoadingStart, loader_time, self.timeLoading, generator_time) #print(fullcommand) self.experiment.cluster.kubectl(fullcommand) # TODO: Also mark volume @@ -2023,71 +2251,57 @@ def check_load_data(self): else: volume = '' if volume: - fullcommand = 'label pvc '+volume+' --overwrite loaded=True timeLoadingStart="{}" timeLoadingEnd="{}" timeLoading={}'.format(int(self.timeLoadingStart), int(self.timeLoadingEnd), self.timeLoading) + fullcommand = 'label pvc '+volume+' --overwrite loaded=True timeLoadingEnd="{}" timeLoadingStart="{}" time_ingested={} timeLoading={} time_generated={}'.format(self.timeLoadingEnd, self.timeLoadingStart, loader_time, self.timeLoading, generator_time) + #fullcommand = 'label pvc '+volume+' --overwrite loaded=True time_ingested={} timeLoadingStart="{}" timeLoadingEnd="{}" timeLoading={}'.format(loader_time, int(self.timeLoadingStart), int(self.timeLoadingEnd), self.timeLoading) #print(fullcommand) self.experiment.cluster.kubectl(fullcommand) - # delete job and all its pods - self.experiment.cluster.delete_job(job) - pods = self.experiment.cluster.get_job_pods(app=app, component=component, experiment=experiment, configuration=configuration) - for pod in pods: - status = self.experiment.cluster.get_pod_status(pod) - print(pod, status) - #if status == "Running": - # TODO: Find names of containers dynamically - container = 'datagenerator' - stdout = self.experiment.cluster.pod_log(pod=pod, container=container) - #stdin, stdout, stderr = self.pod_log(client_pod_name) - filename_log = self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/'+pod+'.'+container+'.log' - f = open(filename_log, "w") - f.write(stdout) - f.close() - # - container = 'sensor' - stdout = self.experiment.cluster.pod_log(pod=pod, container='sensor') - #stdin, stdout, stderr = self.pod_log(client_pod_name) - filename_log = self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/'+pod+'.'+container+'.log' - f = open(filename_log, "w") - f.write(stdout) - f.close() - self.experiment.cluster.delete_pod(pod) - self.experiment.end_loading(job) - loading_pods_active = False - if self.monitoring_active: - #cmd = {} - #cmd['fetch_loading_metrics'] = 'python metrics.py -r /results/ -c {} -ts {} -te {}'.format(self.code, self.timeLoadingStart, self.timeLoadingEnd) - #cmd['fetch_loading_metrics'] = 'python metrics.py -r /results/ -c {} -e {} -ts {} -te {}'.format(connection, self.code, self.timeLoadingStart, self.timeLoadingEnd) - #self.experiment.cluster.logger.debug('load_metrics:{}'.format(cmd['fetch_loading_metrics'])) - #stdout = os.popen(cmd['fetch_loading_metrics']).read()# os.system(fullcommand) - #stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['fetch_loading_metrics'], pod=client_pod_name) - #print(stdout) - self.fetch_metrics_loading(connection=self.configuration) + if len(self.indexscript): + # loading has not finished (there is indexing) + self.load_data(scripts=self.indexscript, time_offset=self.timeLoading, time_start_int=self.timeLoadingStart, script_type='indexed') else: loading_pods_active = False # check if asynch loading outside cluster is done # only if inside cluster is done - if not loading_pods_active: - pod_labels = self.experiment.cluster.get_pods_labels(app=self.appname, component='sut', experiment=self.experiment.code, configuration=self.configuration) - #print(pod_labels) - if len(pod_labels) > 0: - pod = next(iter(pod_labels.keys())) - if 'loaded' in pod_labels[pod]: + pod_labels = self.experiment.cluster.get_pods_labels(app=self.appname, component='sut', experiment=self.experiment.code, configuration=self.configuration) + #print(pod_labels) + if len(pod_labels) > 0: + pod = next(iter(pod_labels.keys())) + if len(self.indexscript): + # we have to check indexing, too + if 'indexed' in pod_labels[pod]: self.loading_started = True - if pod_labels[pod]['loaded'] == 'True': + if pod_labels[pod]['indexed'] == 'True': self.loading_finished = True else: self.loading_finished = False else: - self.loading_started = False - if 'timeLoadingStart' in pod_labels[pod]: - self.timeLoadingStart = pod_labels[pod]['timeLoadingStart'] - if 'timeLoadingEnd' in pod_labels[pod]: - self.timeLoadingEnd = pod_labels[pod]['timeLoadingEnd'] - if 'timeLoading' in pod_labels[pod]: - self.timeLoading = float(pod_labels[pod]['timeLoading']) + self.loading_finished = False + if 'time_indexed' in pod_labels[pod]: + self.timeIndex = float(pod_labels[pod]['time_indexed']) else: - self.loading_started = False - self.loading_finished = False - def load_data(self): + if not loading_pods_active: + if 'loaded' in pod_labels[pod]: + self.loading_started = True + if pod_labels[pod]['loaded'] == 'True': + self.loading_finished = True + else: + self.loading_finished = False + else: + self.loading_started = False + if 'timeLoadingStart' in pod_labels[pod]: + self.timeLoadingStart = int(pod_labels[pod]['timeLoadingStart']) + if 'timeLoadingEnd' in pod_labels[pod]: + self.timeLoadingEnd = int(pod_labels[pod]['timeLoadingEnd']) + if 'timeLoading' in pod_labels[pod]: + self.timeLoading = float(pod_labels[pod]['timeLoading']) + for key, value in pod_labels[pod].items(): + if key.startswith("time_"): + time_type = key[len("time_"):] + self.times_scripts[time_type] = float(value) + else: + self.loading_started = False + self.loading_finished = False + def load_data(self, scripts, time_offset=0, time_start_int=0, script_type='loaded'): """ Start loading data into the sut. This runs `load_data_asynch()` as an asynchronous thread. @@ -2095,12 +2309,13 @@ def load_data(self): """ self.logger.debug('configuration.load_data()') self.loading_started = True - self.prepare_init_dbms() + self.prepare_init_dbms(scripts) service_name = self.generate_component_name(component='sut', configuration=self.configuration, experiment=self.code) pods = self.experiment.cluster.get_pods(component='sut', configuration=self.configuration, experiment=self.code) self.pod_sut = pods[0] scriptfolder = '/tmp/' - commands = self.initscript.copy() + commands = scripts.copy() + #commands = self.initscript.copy() use_storage = self.use_storage() if use_storage: #storage_label = 'tpc-ds-1' @@ -2109,13 +2324,37 @@ def load_data(self): else: volume = '' print("start loading asynch {}".format(self.pod_sut)) - self.logger.debug("load_data_asynch(app="+self.appname+", component='sut', experiment="+self.code+", configuration="+self.configuration+", pod_sut="+self.pod_sut+", scriptfolder="+scriptfolder+", commands="+str(commands)+", loadData="+self.dockertemplate['loadData']+", path="+self.experiment.path+", volume="+volume+", context="+self.experiment.cluster.context+", service_name="+service_name+")") + self.logger.debug("load_data_asynch(app="+self.appname+", component='sut', experiment="+self.code+", configuration="+self.configuration+", pod_sut="+self.pod_sut+", scriptfolder="+scriptfolder+", commands="+str(commands)+", loadData="+self.dockertemplate['loadData']+", path="+self.experiment.path+", volume="+volume+", context="+self.experiment.cluster.context+", service_name="+service_name+", time_offset="+str(time_offset)+", time_start_int="+str(time_start_int)+", script_type="+str(script_type)+")") #result = load_data_asynch(app=self.appname, component='sut', experiment=self.code, configuration=self.configuration, pod_sut=self.pod_sut, scriptfolder=scriptfolder, commands=commands, loadData=self.dockertemplate['loadData'], path=self.experiment.path) - thread_args = {'app':self.appname, 'component':'sut', 'experiment':self.code, 'configuration':self.configuration, 'pod_sut':self.pod_sut, 'scriptfolder':scriptfolder, 'commands':commands, 'loadData':self.dockertemplate['loadData'], 'path':self.experiment.path, 'volume':volume, 'context':self.experiment.cluster.context, 'service_name':service_name} + thread_args = {'app':self.appname, 'component':'sut', 'experiment':self.code, 'configuration':self.configuration, 'pod_sut':self.pod_sut, 'scriptfolder':scriptfolder, 'commands':commands, 'loadData':self.dockertemplate['loadData'], 'path':self.experiment.path, 'volume':volume, 'context':self.experiment.cluster.context, 'service_name':service_name, 'time_offset':time_offset, 'script_type':script_type, 'time_start_int':time_start_int} thread = threading.Thread(target=load_data_asynch, kwargs=thread_args) thread.start() return - def create_manifest_job(self, app='', component='benchmarker', experiment='', configuration='', experimentRun='', client='1', parallelism=1, env={}, template='', nodegroup='', num_pods=1):#, jobname=''): + def get_patched_yaml(self, file, patch=""): + """ + Applies a YAML formatted patch to a YAML file and returns merged result as a YAML object. + + :param file: Name of YAML file to load + :param patch: Optional patch to be applied + :return: YAML object of (patched) file content + """ + if len(patch) > 0: + merged = hiyapyco.load([file, patch], method=hiyapyco.METHOD_MERGE) + print(hiyapyco.dump(merged, default_flow_style=False)) + stream = StringIO(hiyapyco.dump(merged)) # convert string to stream + result = yaml.safe_load_all(stream) + result = [data for data in result] + #print(hiyapyco.dump(merged, default_flow_style=False)) + #patched = yaml.safe_load(hiyapyco.dump(merged)) + return result + else: + with open(file) as f: + result = yaml.safe_load_all(f) + result = [data for data in result] + return result + #unpatched = yaml.safe_load(f) + #return unpatched + def create_manifest_job(self, app='', component='benchmarker', experiment='', configuration='', experimentRun='', client='1', parallelism=1, env={}, template='', nodegroup='', num_pods=1, connection='', patch_yaml=''):#, jobname=''): """ Creates a job and sets labels (component/ experiment/ configuration). @@ -2141,6 +2380,13 @@ def create_manifest_job(self, app='', component='benchmarker', experiment='', co jobname = self.generate_component_name(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=str(client)) servicename = self.generate_component_name(app=app, component='sut', experiment=experiment, configuration=configuration) #print(jobname) + # start (create) time of the job + now = datetime.utcnow() + now_string = now.strftime('%Y-%m-%d %H:%M:%S') + time_now = str(datetime.now()) + time_now_int = int(datetime.timestamp(datetime.strptime(time_now,'%Y-%m-%d %H:%M:%S.%f'))) + #self.current_benchmark_start = int(time_now_int) + # parameter of the configuration c = copy.deepcopy(self.dockertemplate['template']) c['connectionmanagement'] = {} c['connectionmanagement']['numProcesses'] = self.connectionmanagement['numProcesses'] @@ -2174,13 +2420,21 @@ def create_manifest_job(self, app='', component='benchmarker', experiment='', co #job_experiment = self.experiment.path+'/job-dbmsbenchmarker-{configuration}-{client}.yml'.format(configuration=configuration, client=client) job_experiment = self.experiment.path+'/{app}-{component}-{configuration}-{experimentRun}-{client}.yml'.format(app=app, component=component, configuration=configuration, experimentRun=experimentRun, client=client) #with open(self.experiment.cluster.yamlfolder+"jobtemplate-dbmsbenchmarker.yml") as stream: - with open(self.experiment.cluster.yamlfolder+template) as stream: - try: - result=yaml.safe_load_all(stream) - result = [data for data in result] - #print(result) - except yaml.YAMLError as exc: - print(exc) + # old unpatched loader: + #with open(self.experiment.cluster.yamlfolder+template) as stream: + # try: + # result = yaml.safe_load_all(stream) + # result = [data for data in result] + # #print(result) + # except yaml.YAMLError as exc: + # print(exc) + try: + result = self.get_patched_yaml(self.experiment.cluster.yamlfolder+template, patch_yaml) + #stream = StringIO(patched) # convert string to stream + #result = yaml.safe_load_all(stream) + #result = [data for data in result] + except yaml.YAMLError as exc: + print(exc) for dep in result: if dep['kind'] == 'Job': dep['metadata']['name'] = jobname @@ -2190,19 +2444,27 @@ def create_manifest_job(self, app='', component='benchmarker', experiment='', co dep['metadata']['labels']['app'] = app dep['metadata']['labels']['component'] = component dep['metadata']['labels']['configuration'] = configuration + dep['metadata']['labels']['connection'] = connection dep['metadata']['labels']['dbms'] = self.docker dep['metadata']['labels']['experiment'] = str(experiment) dep['metadata']['labels']['client'] = str(client) dep['metadata']['labels']['experimentRun'] = str(experimentRun) dep['metadata']['labels']['volume'] = self.volume + for label_key, label_value in self.additional_labels.items(): + dep['metadata']['labels'][label_key] = str(label_value) + dep['metadata']['labels']['start_time'] = str(time_now_int) dep['spec']['template']['metadata']['labels']['app'] = app dep['spec']['template']['metadata']['labels']['component'] = component dep['spec']['template']['metadata']['labels']['configuration'] = configuration + dep['spec']['template']['metadata']['labels']['connection'] = connection dep['spec']['template']['metadata']['labels']['dbms'] = self.docker dep['spec']['template']['metadata']['labels']['experiment'] = str(experiment) dep['spec']['template']['metadata']['labels']['client'] = str(client) dep['spec']['template']['metadata']['labels']['experimentRun'] = str(experimentRun) dep['spec']['template']['metadata']['labels']['volume'] = self.volume + for label_key, label_value in self.additional_labels.items(): + dep['spec']['template']['metadata']['labels'][label_key] = str(label_value) + dep['spec']['template']['metadata']['labels']['start_time'] = str(time_now_int) for i_container, c in enumerate(dep['spec']['template']['spec']['containers']): #print(i_container) env_manifest = {} @@ -2268,7 +2530,7 @@ def create_manifest_benchmarking(self, connection, app='', component='benchmarke # determine start time now = datetime.utcnow() now_string = now.strftime('%Y-%m-%d %H:%M:%S') - start = now + timedelta(seconds=180) + start = now + timedelta(seconds=240) start_string = start.strftime('%Y-%m-%d %H:%M:%S') e = {'DBMSBENCHMARKER_NOW': now_string, 'DBMSBENCHMARKER_START': start_string, @@ -2279,9 +2541,10 @@ def create_manifest_benchmarking(self, connection, app='', component='benchmarke 'DBMSBENCHMARKER_SLEEP': str(60), 'DBMSBENCHMARKER_ALIAS': alias} env = {**env, **e} + env = {**env, **self.benchmarking_parameters} #job_experiment = self.experiment.path+'/job-dbmsbenchmarker-{configuration}-{experimentRun}-{client}.yml'.format(configuration=configuration, client=client) - return self.create_manifest_job(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=client, parallelism=parallelism, env=env, template="jobtemplate-dbmsbenchmarker.yml", num_pods=num_pods, nodegroup='benchmarking') - def create_manifest_maintaining(self, app='', component='maintaining', experiment='', configuration='', parallelism=1, alias='', num_pods=1): + return self.create_manifest_job(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=client, parallelism=parallelism, env=env, template="jobtemplate-benchmarking-dbmsbenchmarker.yml", num_pods=num_pods, nodegroup='benchmarking', connection=connection) + def create_manifest_maintaining(self, app='', component='maintaining', experiment='', configuration='', parallelism=1, alias='', num_pods=1, connection=''): """ Creates a job template for maintaining. This sets meta data in the template and ENV. @@ -2322,8 +2585,8 @@ def create_manifest_maintaining(self, app='', component='maintaining', experimen template = "jobtemplate-maintaining.yml" if len(self.experiment.jobtemplate_maintaining) > 0: template = self.experiment.jobtemplate_maintaining - return self.create_manifest_job(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=1, parallelism=parallelism, env=env, template=template, num_pods=num_pods, nodegroup='maintaining')#, jobname=jobname) - def create_manifest_loading(self, app='', component='loading', experiment='', configuration='', parallelism=1, alias='', num_pods=1): + return self.create_manifest_job(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=1, parallelism=parallelism, env=env, template=template, num_pods=num_pods, nodegroup='maintaining', connection=connection)#, jobname=jobname) + def create_manifest_loading(self, app='', component='loading', experiment='', configuration='', parallelism=1, alias='', num_pods=1, connection=''): """ Creates a job template for loading. This sets meta data in the template and ENV. @@ -2367,7 +2630,7 @@ def create_manifest_loading(self, app='', component='loading', experiment='', co template = self.experiment.jobtemplate_loading if len(self.jobtemplate_loading) > 0: template = self.jobtemplate_loading - return self.create_manifest_job(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=1, parallelism=parallelism, env=env, template=template, nodegroup='loading', num_pods=num_pods) + return self.create_manifest_job(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=1, parallelism=parallelism, env=env, template=template, nodegroup='loading', num_pods=num_pods, connection=connection, patch_yaml=self.loading_patch) @@ -2408,212 +2671,24 @@ class hammerdb(default): You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ - def run_benchmarker_pod(self, - connection=None, - alias='', - dialect='', - query=None, - app='', - component='benchmarker', - experiment='', - configuration='', - client='1', - parallelism=1): + def create_manifest_benchmarking(self, connection, app='', component='benchmarker', experiment='', configuration='', experimentRun='', client='1', parallelism=1, alias='', num_pods=1): """ - Runs the benchmarker job. - Sets meta data in the connection.config. - Copy query.config and connection.config to the first pod of the job (result folder mounted into every pod) + Creates a job template for the benchmarker. + This sets meta data in the template and ENV. + This sets some settings specific to HammerDB. - :param connection: Name of configuration prolonged by number of runs of the sut (installations) and number of client in a sequence of - :param alias: An alias can be given if we want to anonymize the dbms - :param dialect: A name of a SQL dialect can be given - :param query: The benchmark can be fixed to a specific query :param app: app the job belongs to :param component: Component, for example sut or monitoring :param experiment: Unique identifier of the experiment :param configuration: Name of the dbms configuration - :param client: Number of benchmarker this is in a sequence of - :param parallelism: Number of parallel benchmarker pods we want to have + :param client: Number of benchmarker if there is a sequence of benchmarkers + :param parallelism: Number of parallel pods in job + :param alias: Alias name of the dbms + :return: Name of file in YAML format containing the benchmarker job """ - self.logger.debug('hammerdb.run_benchmarker_pod()') - resultfolder = self.experiment.cluster.config['benchmarker']['resultfolder'] - experiments_configfolder = self.experiment.cluster.experiments_configfolder - if connection is None: - connection = self.configuration#self.getConnectionName() - if len(configuration) == 0: - configuration = connection - code = self.code - if not isinstance(client, str): - client = str(client) - if not self.client: - self.client = client - if len(dialect) == 0 and len(self.dialect) > 0: - dialect = self.dialect - experimentRun = str(self.num_experiment_to_apply_done+1) - #self.experiment.cluster.stopPortforwarding() - # set query management for new query file - tools.query.template = self.experiment.querymanagement - # get connection config (sut) - monitoring_host = self.generate_component_name(component='monitoring', configuration=configuration, experiment=self.code) - service_name = self.generate_component_name(component='sut', configuration=configuration, experiment=self.code) - service_namespace = self.experiment.cluster.contextdata['namespace'] - service_host = self.experiment.cluster.contextdata['service_sut'].format(service=service_name, namespace=service_namespace) - pods = self.experiment.cluster.get_pods(component='sut', configuration=configuration, experiment=self.code) - self.pod_sut = pods[0] - #service_port = config_K8s['port'] - c = self.get_connection_config(connection, alias, dialect, serverip=service_host, monitoring_host=monitoring_host)#config_K8s['ip']) - #c['parameter'] = {} - c['parameter'] = self.eval_parameters - c['parameter']['parallelism'] = parallelism - c['parameter']['client'] = client - c['parameter']['numExperiment'] = experimentRun - c['parameter']['dockerimage'] = self.dockerimage - c['parameter']['connection_parameter'] = self.connection_parameter - #print(c) - #print(self.experiment.cluster.config['benchmarker']['jarfolder']) - if isinstance(c['JDBC']['jar'], list): - for i, j in enumerate(c['JDBC']['jar']): - c['JDBC']['jar'][i] = self.experiment.cluster.config['benchmarker']['jarfolder']+c['JDBC']['jar'][i] - elif isinstance(c['JDBC']['jar'], str): - c['JDBC']['jar'] = self.experiment.cluster.config['benchmarker']['jarfolder']+c['JDBC']['jar'] - #print(c) - self.logger.debug('hammerdb.run_benchmarker_pod(): {}'.format(connection)) - self.benchmark = benchmarker.benchmarker( - fixedConnection=connection, - fixedQuery=query, - result_path=resultfolder, - batch=True, - working='connection', - code=code - ) - #self.benchmark.code = '1611607321' - self.code = self.benchmark.code - #print("Code", self.code) - self.logger.debug('hammerdb.run_benchmarker_pod(Code={})'.format(self.code)) - # read config for benchmarker - connectionfile = experiments_configfolder+'/connections.config' - if self.experiment.queryfile is not None: - queryfile = experiments_configfolder+'/'+self.experiment.queryfile - else: - queryfile = experiments_configfolder+'/queries.config' - self.benchmark.getConfig(connectionfile=connectionfile, queryfile=queryfile) - if c['name'] in self.benchmark.dbms: - print("Rerun connection "+connection) - # TODO: Find and replace connection info - else: - self.benchmark.connections.append(c) - # NEVER rerun, only one connection in config for detached: - #self.benchmark.connections = [c] - #print(self.benchmark.connections) - #self.logger.debug('configuration.run_benchmarker_pod(): {}'.format(self.benchmark.connections)) - self.benchmark.dbms[c['name']] = tools.dbms(c, False) - # copy or generate config folder (query and connection) - # add connection to existing list - # or: generate new connection list - filename = self.benchmark.path+'/connections.config' - with open(filename, 'w') as f: - f.write(str(self.benchmark.connections)) - filename = self.benchmark.path+'/'+c['name']+'.config' - with open(filename, 'w') as f: - f.write(str(c)) - # write appended query config - if len(self.experiment.workload) > 0: - for k,v in self.experiment.workload.items(): - self.benchmark.queryconfig[k] = v - filename = self.benchmark.path+'/queries.config' - with open(filename, 'w') as f: - f.write(str(self.benchmark.queryconfig)) - # generate all parameters and store in protocol - self.benchmark.reporterStore.readProtocol() - self.benchmark.generateAllParameters() - self.benchmark.reporterStore.writeProtocol() - # store experiment - experiment = {} - experiment['delay'] = 0 - experiment['step'] = "runBenchmarks" - experiment['connection'] = connection - experiment['connectionmanagement'] = self.connectionmanagement.copy() - self.experiment.cluster.log_experiment(experiment) - # create pod - yamlfile = self.create_manifest_benchmarking(connection=connection, component=component, configuration=configuration, experiment=self.code, experimentRun=experimentRun, client=client, parallelism=parallelism, alias=c['alias'], num_pods=parallelism) - # start pod - self.experiment.cluster.kubectl('create -f '+yamlfile) - pods = [] - while len(pods) == 0: - self.wait(10) - pods = self.experiment.cluster.get_job_pods(component=component, configuration=configuration, experiment=self.code, client=client) - client_pod_name = pods[0] - status = self.experiment.cluster.get_pod_status(client_pod_name) - self.logger.debug('Pod={} has status={}'.format(client_pod_name, status)) - print("Waiting for job {}: ".format(client_pod_name), end="", flush=True) - while status != "Running" and status != "Succeeded": - self.logger.debug('Pod={} has status={}'.format(client_pod_name, status)) - print(".", end="", flush=True) - #self.wait(10) - # maybe pod had to be restarted - pods = [] - while len(pods) == 0: - self.wait(10, silent=True) - pods = self.experiment.cluster.get_job_pods(component=component, configuration=configuration, experiment=self.code, client=client) - client_pod_name = pods[0] - status = self.experiment.cluster.get_pod_status(client_pod_name) - print("found") - # get monitoring for loading - """ - if self.monitoring_active: - print("get monitoring for loading") - logger = logging.getLogger('dbmsbenchmarker') - logging.basicConfig(level=logging.DEBUG) - for connection_number, connection_data in self.benchmark.dbms.items(): - #connection = self.benchmark.dbms[c['name']] - print(connection_number, connection_data) - print(connection_data.connectiondata['monitoring']['prometheus_url']) - query='loading' - for m, metric in connection_data.connectiondata['monitoring']['metrics'].items(): - print(m) - monitor.metrics.fetchMetric(query, m, connection_number, connection_data.connectiondata, int(self.timeLoadingStart), int(self.timeLoadingEnd), '{result_path}'.format(result_path=self.benchmark.path)) - """ - # copy config to pod - dashboard - pods = self.experiment.cluster.get_pods(component='dashboard') - if len(pods) > 0: - pod_dashboard = pods[0] - cmd = {} - cmd['prepare_log'] = 'mkdir -p /results/'+str(self.code) - stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['prepare_log'], pod=pod_dashboard, container="dashboard") - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/queries.config '+pod_dashboard+':/results/'+str(self.code)+'/queries.config') - self.logger.debug('copy config queries.config: {}'.format(stdout)) - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/connections.config '+pod_dashboard+':/results/'+str(self.code)+'/'+c['name']+'.config') - self.logger.debug('copy config {}: {}'.format(c['name']+'.config', stdout)) - # copy twice to be more sure it worked - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/connections.config '+pod_dashboard+':/results/'+str(self.code)+'/'+c['name']+'.config') - self.logger.debug('copy config {}: {}'.format(c['name']+'.config', stdout)) - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/connections.config '+pod_dashboard+':/results/'+str(self.code)+'/connections.config') - self.logger.debug('copy config connections.config: {}'.format(stdout)) - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/protocol.json '+pod_dashboard+':/results/'+str(self.code)+'/protocol.json') - self.logger.debug('copy config protocol.json: {}'.format(stdout)) - # get monitoring for loading - if self.monitoring_active: - cmd = {} - cmd['fetch_loading_metrics'] = 'python metrics.py -r /results/ -c {} -e {} -ts {} -te {}'.format(connection, self.code, self.timeLoadingStart, self.timeLoadingEnd) - stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['fetch_loading_metrics'], pod=pod_dashboard, container="dashboard") - def create_manifest_benchmarking(self, connection, app='', component='benchmarker', experiment='', configuration='', experimentRun='', client='1', parallelism=1, alias='', num_pods=1): - """ - Creates a job template for the benchmarker. - This sets meta data in the template and ENV. - This sets some settings specific to HammerDB. - - :param app: app the job belongs to - :param component: Component, for example sut or monitoring - :param experiment: Unique identifier of the experiment - :param configuration: Name of the dbms configuration - :param client: Number of benchmarker if there is a sequence of benchmarkers - :param parallelism: Number of parallel pods in job - :param alias: Alias name of the dbms - :return: Name of file in YAML format containing the benchmarker job - """ - if len(app) == 0: - app = self.appname - code = str(int(experiment)) + if len(app) == 0: + app = self.appname + code = str(int(experiment)) experimentRun = str(self.num_experiment_to_apply_done+1) #connection = configuration jobname = self.generate_component_name(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=client) @@ -2638,7 +2713,7 @@ def create_manifest_benchmarking(self, connection, app='', component='benchmarke env = {**env, **self.loading_parameters} env = {**env, **self.benchmarking_parameters} #job_experiment = self.experiment.path+'/job-dbmsbenchmarker-{configuration}-{client}.yml'.format(configuration=configuration, client=client) - return self.create_manifest_job(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=client, parallelism=parallelism, env=env, template="jobtemplate-hammerdb-tpcc.yml", num_pods=num_pods, nodegroup='benchmarking')#, jobname=jobname) + return self.create_manifest_job(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=client, parallelism=parallelism, env=env, template="jobtemplate-benchmarking-hammerdb.yml", num_pods=num_pods, nodegroup='benchmarking', connection=connection)#, jobname=jobname) @@ -2674,194 +2749,6 @@ class ycsb(default): You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ - def run_benchmarker_pod(self, - connection=None, - alias='', - dialect='', - query=None, - app='', - component='benchmarker', - experiment='', - configuration='', - client='1', - parallelism=1): - """ - Runs the benchmarker job. - Sets meta data in the connection.config. - Copy query.config and connection.config to the first pod of the job (result folder mounted into every pod) - - :param connection: Name of configuration prolonged by number of runs of the sut (installations) and number of client in a sequence of - :param alias: An alias can be given if we want to anonymize the dbms - :param dialect: A name of a SQL dialect can be given - :param query: The benchmark can be fixed to a specific query - :param app: app the job belongs to - :param component: Component, for example sut or monitoring - :param experiment: Unique identifier of the experiment - :param configuration: Name of the dbms configuration - :param client: Number of benchmarker this is in a sequence of - :param parallelism: Number of parallel benchmarker pods we want to have - """ - self.logger.debug('ycsb.run_benchmarker_pod()') - resultfolder = self.experiment.cluster.config['benchmarker']['resultfolder'] - experiments_configfolder = self.experiment.cluster.experiments_configfolder - if connection is None: - connection = self.configuration#self.getConnectionName() - if len(configuration) == 0: - configuration = connection - code = self.code - if not isinstance(client, str): - client = str(client) - if not self.client: - self.client = client - if len(dialect) == 0 and len(self.dialect) > 0: - dialect = self.dialect - experimentRun = str(self.num_experiment_to_apply_done+1) - #self.experiment.cluster.stopPortforwarding() - # set query management for new query file - tools.query.template = self.experiment.querymanagement - # get connection config (sut) - monitoring_host = self.generate_component_name(component='monitoring', configuration=configuration, experiment=self.code) - service_name = self.generate_component_name(component='sut', configuration=configuration, experiment=self.code) - service_namespace = self.experiment.cluster.contextdata['namespace'] - service_host = self.experiment.cluster.contextdata['service_sut'].format(service=service_name, namespace=service_namespace) - pods = self.experiment.cluster.get_pods(component='sut', configuration=configuration, experiment=self.code) - self.pod_sut = pods[0] - #service_port = config_K8s['port'] - c = self.get_connection_config(connection, alias, dialect, serverip=service_host, monitoring_host=monitoring_host)#config_K8s['ip']) - #c['parameter'] = {} - c['parameter'] = self.eval_parameters - c['parameter']['parallelism'] = parallelism - c['parameter']['client'] = client - c['parameter']['numExperiment'] = experimentRun - c['parameter']['dockerimage'] = self.dockerimage - c['parameter']['connection_parameter'] = self.connection_parameter - #print(c) - #print(self.experiment.cluster.config['benchmarker']['jarfolder']) - if isinstance(c['JDBC']['jar'], list): - for i, j in enumerate(c['JDBC']['jar']): - c['JDBC']['jar'][i] = self.experiment.cluster.config['benchmarker']['jarfolder']+c['JDBC']['jar'][i] - elif isinstance(c['JDBC']['jar'], str): - c['JDBC']['jar'] = self.experiment.cluster.config['benchmarker']['jarfolder']+c['JDBC']['jar'] - #print(c) - self.logger.debug('ycsb.run_benchmarker_pod(): {}'.format(connection)) - self.benchmark = benchmarker.benchmarker( - fixedConnection=connection, - fixedQuery=query, - result_path=resultfolder, - batch=True, - working='connection', - code=code - ) - #self.benchmark.code = '1611607321' - self.code = self.benchmark.code - #print("Code", self.code) - self.logger.debug('ycsb.run_benchmarker_pod(Code={})'.format(self.code)) - # read config for benchmarker - connectionfile = experiments_configfolder+'/connections.config' - if self.experiment.queryfile is not None: - queryfile = experiments_configfolder+'/'+self.experiment.queryfile - else: - queryfile = experiments_configfolder+'/queries.config' - self.benchmark.getConfig(connectionfile=connectionfile, queryfile=queryfile) - if c['name'] in self.benchmark.dbms: - print("Rerun connection "+connection) - # TODO: Find and replace connection info - else: - self.benchmark.connections.append(c) - # NEVER rerun, only one connection in config for detached: - #self.benchmark.connections = [c] - #print(self.benchmark.connections) - #self.logger.debug('configuration.run_benchmarker_pod(): {}'.format(self.benchmark.connections)) - self.benchmark.dbms[c['name']] = tools.dbms(c, False) - # copy or generate config folder (query and connection) - # add connection to existing list - # or: generate new connection list - filename = self.benchmark.path+'/connections.config' - with open(filename, 'w') as f: - f.write(str(self.benchmark.connections)) - filename = self.benchmark.path+'/'+c['name']+'.config' - with open(filename, 'w') as f: - f.write(str(c)) - # write appended query config - if len(self.experiment.workload) > 0: - for k,v in self.experiment.workload.items(): - self.benchmark.queryconfig[k] = v - filename = self.benchmark.path+'/queries.config' - with open(filename, 'w') as f: - f.write(str(self.benchmark.queryconfig)) - # generate all parameters and store in protocol - self.benchmark.reporterStore.readProtocol() - self.benchmark.generateAllParameters() - self.benchmark.reporterStore.writeProtocol() - # store experiment - experiment = {} - experiment['delay'] = 0 - experiment['step'] = "runBenchmarks" - experiment['connection'] = connection - experiment['connectionmanagement'] = self.connectionmanagement.copy() - self.experiment.cluster.log_experiment(experiment) - # create pod - yamlfile = self.create_manifest_benchmarking(connection=connection, component=component, configuration=configuration, experiment=self.code, experimentRun=experimentRun, client=client, parallelism=parallelism, alias=c['alias'], num_pods=parallelism) - # start pod - self.experiment.cluster.kubectl('create -f '+yamlfile) - pods = [] - while len(pods) == 0: - self.wait(10) - pods = self.experiment.cluster.get_job_pods(component=component, configuration=configuration, experiment=self.code, client=client) - client_pod_name = pods[0] - status = self.experiment.cluster.get_pod_status(client_pod_name) - self.logger.debug('Pod={} has status={}'.format(client_pod_name, status)) - print("Waiting for job {}: ".format(client_pod_name), end="", flush=True) - while status != "Running" and status != "Succeeded": - self.logger.debug('Pod={} has status={}'.format(client_pod_name, status)) - print(".", end="", flush=True) - #self.wait(10) - # maybe pod had to be restarted - pods = [] - while len(pods) == 0: - self.wait(10, silent=True) - pods = self.experiment.cluster.get_job_pods(component=component, configuration=configuration, experiment=self.code, client=client) - client_pod_name = pods[0] - status = self.experiment.cluster.get_pod_status(client_pod_name) - print("found") - # get monitoring for loading - """ - if self.monitoring_active: - print("get monitoring for loading") - logger = logging.getLogger('dbmsbenchmarker') - logging.basicConfig(level=logging.DEBUG) - for connection_number, connection_data in self.benchmark.dbms.items(): - #connection = self.benchmark.dbms[c['name']] - print(connection_number, connection_data) - print(connection_data.connectiondata['monitoring']['prometheus_url']) - query='loading' - for m, metric in connection_data.connectiondata['monitoring']['metrics'].items(): - print(m) - monitor.metrics.fetchMetric(query, m, connection_number, connection_data.connectiondata, int(self.timeLoadingStart), int(self.timeLoadingEnd), '{result_path}'.format(result_path=self.benchmark.path)) - """ - # copy config to pod - dashboard - pods = self.experiment.cluster.get_pods(component='dashboard') - if len(pods) > 0: - pod_dashboard = pods[0] - cmd = {} - cmd['prepare_log'] = 'mkdir -p /results/'+str(self.code) - stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['prepare_log'], pod=pod_dashboard, container="dashboard") - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/queries.config '+pod_dashboard+':/results/'+str(self.code)+'/queries.config') - self.logger.debug('copy config queries.config: {}'.format(stdout)) - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/connections.config '+pod_dashboard+':/results/'+str(self.code)+'/'+c['name']+'.config') - self.logger.debug('copy config {}: {}'.format(c['name']+'.config', stdout)) - # copy twice to be more sure it worked - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/connections.config '+pod_dashboard+':/results/'+str(self.code)+'/'+c['name']+'.config') - self.logger.debug('copy config {}: {}'.format(c['name']+'.config', stdout)) - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/connections.config '+pod_dashboard+':/results/'+str(self.code)+'/connections.config') - self.logger.debug('copy config connections.config: {}'.format(stdout)) - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/protocol.json '+pod_dashboard+':/results/'+str(self.code)+'/protocol.json') - self.logger.debug('copy config protocol.json: {}'.format(stdout)) - # get monitoring for loading - if self.monitoring_active: - cmd = {} - cmd['fetch_loading_metrics'] = 'python metrics.py -r /results/ -c {} -e {} -ts {} -te {}'.format(connection, self.code, self.timeLoadingStart, self.timeLoadingEnd) - stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['fetch_loading_metrics'], pod=pod_dashboard, container="dashboard") def create_manifest_benchmarking(self, connection, app='', component='benchmarker', experiment='', configuration='', experimentRun='', client='1', parallelism=1, alias='', num_pods=1): """ Creates a job template for the benchmarker. @@ -2904,7 +2791,7 @@ def create_manifest_benchmarking(self, connection, app='', component='benchmarke env = {**env, **self.loading_parameters} env = {**env, **self.benchmarking_parameters} #job_experiment = self.experiment.path+'/job-dbmsbenchmarker-{configuration}-{client}.yml'.format(configuration=configuration, client=client) - return self.create_manifest_job(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=client, parallelism=parallelism, env=env, template="jobtemplate-benchmarking-ycsb.yml", num_pods=num_pods, nodegroup='benchmarking')#, jobname=jobname) + return self.create_manifest_job(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=client, parallelism=parallelism, env=env, template="jobtemplate-benchmarking-ycsb.yml", num_pods=num_pods, nodegroup='benchmarking', connection=connection)#, jobname=jobname) @@ -2939,194 +2826,6 @@ class benchbase(default): You should have received a copy of the GNU Affero General Public License along with this program. If not, see . """ - def run_benchmarker_pod(self, - connection=None, - alias='', - dialect='', - query=None, - app='', - component='benchmarker', - experiment='', - configuration='', - client='1', - parallelism=1): - """ - Runs the benchmarker job. - Sets meta data in the connection.config. - Copy query.config and connection.config to the first pod of the job (result folder mounted into every pod) - - :param connection: Name of configuration prolonged by number of runs of the sut (installations) and number of client in a sequence of - :param alias: An alias can be given if we want to anonymize the dbms - :param dialect: A name of a SQL dialect can be given - :param query: The benchmark can be fixed to a specific query - :param app: app the job belongs to - :param component: Component, for example sut or monitoring - :param experiment: Unique identifier of the experiment - :param configuration: Name of the dbms configuration - :param client: Number of benchmarker this is in a sequence of - :param parallelism: Number of parallel benchmarker pods we want to have - """ - self.logger.debug('benchbase.run_benchmarker_pod()') - resultfolder = self.experiment.cluster.config['benchmarker']['resultfolder'] - experiments_configfolder = self.experiment.cluster.experiments_configfolder - if connection is None: - connection = self.configuration#self.getConnectionName() - if len(configuration) == 0: - configuration = connection - code = self.code - if not isinstance(client, str): - client = str(client) - if not self.client: - self.client = client - if len(dialect) == 0 and len(self.dialect) > 0: - dialect = self.dialect - experimentRun = str(self.num_experiment_to_apply_done+1) - #self.experiment.cluster.stopPortforwarding() - # set query management for new query file - tools.query.template = self.experiment.querymanagement - # get connection config (sut) - monitoring_host = self.generate_component_name(component='monitoring', configuration=configuration, experiment=self.code) - service_name = self.generate_component_name(component='sut', configuration=configuration, experiment=self.code) - service_namespace = self.experiment.cluster.contextdata['namespace'] - service_host = self.experiment.cluster.contextdata['service_sut'].format(service=service_name, namespace=service_namespace) - pods = self.experiment.cluster.get_pods(component='sut', configuration=configuration, experiment=self.code) - self.pod_sut = pods[0] - #service_port = config_K8s['port'] - c = self.get_connection_config(connection, alias, dialect, serverip=service_host, monitoring_host=monitoring_host)#config_K8s['ip']) - #c['parameter'] = {} - c['parameter'] = self.eval_parameters - c['parameter']['parallelism'] = parallelism - c['parameter']['client'] = client - c['parameter']['numExperiment'] = experimentRun - c['parameter']['dockerimage'] = self.dockerimage - c['parameter']['connection_parameter'] = self.connection_parameter - #print(c) - #print(self.experiment.cluster.config['benchmarker']['jarfolder']) - if isinstance(c['JDBC']['jar'], list): - for i, j in enumerate(c['JDBC']['jar']): - c['JDBC']['jar'][i] = self.experiment.cluster.config['benchmarker']['jarfolder']+c['JDBC']['jar'][i] - elif isinstance(c['JDBC']['jar'], str): - c['JDBC']['jar'] = self.experiment.cluster.config['benchmarker']['jarfolder']+c['JDBC']['jar'] - #print(c) - self.logger.debug('benchbase.run_benchmarker_pod(): {}'.format(connection)) - self.benchmark = benchmarker.benchmarker( - fixedConnection=connection, - fixedQuery=query, - result_path=resultfolder, - batch=True, - working='connection', - code=code - ) - #self.benchmark.code = '1611607321' - self.code = self.benchmark.code - #print("Code", self.code) - self.logger.debug('benchbase.run_benchmarker_pod(Code={})'.format(self.code)) - # read config for benchmarker - connectionfile = experiments_configfolder+'/connections.config' - if self.experiment.queryfile is not None: - queryfile = experiments_configfolder+'/'+self.experiment.queryfile - else: - queryfile = experiments_configfolder+'/queries.config' - self.benchmark.getConfig(connectionfile=connectionfile, queryfile=queryfile) - if c['name'] in self.benchmark.dbms: - print("Rerun connection "+connection) - # TODO: Find and replace connection info - else: - self.benchmark.connections.append(c) - # NEVER rerun, only one connection in config for detached: - #self.benchmark.connections = [c] - #print(self.benchmark.connections) - #self.logger.debug('configuration.run_benchmarker_pod(): {}'.format(self.benchmark.connections)) - self.benchmark.dbms[c['name']] = tools.dbms(c, False) - # copy or generate config folder (query and connection) - # add connection to existing list - # or: generate new connection list - filename = self.benchmark.path+'/connections.config' - with open(filename, 'w') as f: - f.write(str(self.benchmark.connections)) - filename = self.benchmark.path+'/'+c['name']+'.config' - with open(filename, 'w') as f: - f.write(str(c)) - # write appended query config - if len(self.experiment.workload) > 0: - for k,v in self.experiment.workload.items(): - self.benchmark.queryconfig[k] = v - filename = self.benchmark.path+'/queries.config' - with open(filename, 'w') as f: - f.write(str(self.benchmark.queryconfig)) - # generate all parameters and store in protocol - self.benchmark.reporterStore.readProtocol() - self.benchmark.generateAllParameters() - self.benchmark.reporterStore.writeProtocol() - # store experiment - experiment = {} - experiment['delay'] = 0 - experiment['step'] = "runBenchmarks" - experiment['connection'] = connection - experiment['connectionmanagement'] = self.connectionmanagement.copy() - self.experiment.cluster.log_experiment(experiment) - # create pod - yamlfile = self.create_manifest_benchmarking(connection=connection, component=component, configuration=configuration, experiment=self.code, experimentRun=experimentRun, client=client, parallelism=parallelism, alias=c['alias'], num_pods=parallelism) - # start pod - self.experiment.cluster.kubectl('create -f '+yamlfile) - pods = [] - while len(pods) == 0: - self.wait(10) - pods = self.experiment.cluster.get_job_pods(component=component, configuration=configuration, experiment=self.code, client=client) - client_pod_name = pods[0] - status = self.experiment.cluster.get_pod_status(client_pod_name) - self.logger.debug('Pod={} has status={}'.format(client_pod_name, status)) - print("Waiting for job {}: ".format(client_pod_name), end="", flush=True) - while status != "Running" and status != "Succeeded": - self.logger.debug('Pod={} has status={}'.format(client_pod_name, status)) - print(".", end="", flush=True) - #self.wait(10) - # maybe pod had to be restarted - pods = [] - while len(pods) == 0: - self.wait(10, silent=True) - pods = self.experiment.cluster.get_job_pods(component=component, configuration=configuration, experiment=self.code, client=client) - client_pod_name = pods[0] - status = self.experiment.cluster.get_pod_status(client_pod_name) - print("found") - # get monitoring for loading - """ - if self.monitoring_active: - print("get monitoring for loading") - logger = logging.getLogger('dbmsbenchmarker') - logging.basicConfig(level=logging.DEBUG) - for connection_number, connection_data in self.benchmark.dbms.items(): - #connection = self.benchmark.dbms[c['name']] - print(connection_number, connection_data) - print(connection_data.connectiondata['monitoring']['prometheus_url']) - query='loading' - for m, metric in connection_data.connectiondata['monitoring']['metrics'].items(): - print(m) - monitor.metrics.fetchMetric(query, m, connection_number, connection_data.connectiondata, int(self.timeLoadingStart), int(self.timeLoadingEnd), '{result_path}'.format(result_path=self.benchmark.path)) - """ - # copy config to pod - dashboard - pods = self.experiment.cluster.get_pods(component='dashboard') - if len(pods) > 0: - pod_dashboard = pods[0] - cmd = {} - cmd['prepare_log'] = 'mkdir -p /results/'+str(self.code) - stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['prepare_log'], pod=pod_dashboard, container='dashboard') - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/queries.config '+pod_dashboard+':/results/'+str(self.code)+'/queries.config') - self.logger.debug('copy config queries.config: {}'.format(stdout)) - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/connections.config '+pod_dashboard+':/results/'+str(self.code)+'/'+c['name']+'.config') - self.logger.debug('copy config {}: {}'.format(c['name']+'.config', stdout)) - # copy twice to be more sure it worked - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/connections.config '+pod_dashboard+':/results/'+str(self.code)+'/'+c['name']+'.config') - self.logger.debug('copy config {}: {}'.format(c['name']+'.config', stdout)) - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/connections.config '+pod_dashboard+':/results/'+str(self.code)+'/connections.config') - self.logger.debug('copy config connections.config: {}'.format(stdout)) - stdout = self.experiment.cluster.kubectl('cp --container dashboard '+self.experiment.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/protocol.json '+pod_dashboard+':/results/'+str(self.code)+'/protocol.json') - self.logger.debug('copy config protocol.json: {}'.format(stdout)) - # get monitoring for loading - if self.monitoring_active: - cmd = {} - cmd['fetch_loading_metrics'] = 'python metrics.py -r /results/ -c {} -e {} -ts {} -te {}'.format(connection, self.code, self.timeLoadingStart, self.timeLoadingEnd) - stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['fetch_loading_metrics'], pod=pod_dashboard, container="dashboard") def create_manifest_benchmarking(self, connection, app='', component='benchmarker', experiment='', configuration='', experimentRun='', client='1', parallelism=1, alias='', num_pods=1): """ Creates a job template for the benchmarker. @@ -3169,7 +2868,7 @@ def create_manifest_benchmarking(self, connection, app='', component='benchmarke env = {**env, **self.loading_parameters} env = {**env, **self.benchmarking_parameters} #job_experiment = self.experiment.path+'/job-dbmsbenchmarker-{configuration}-{client}.yml'.format(configuration=configuration, client=client) - return self.create_manifest_job(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=client, parallelism=parallelism, env=env, template="jobtemplate-benchmarking-benchbase.yml", num_pods=num_pods, nodegroup='benchmarking')#, jobname=jobname) + return self.create_manifest_job(app=app, component=component, experiment=experiment, configuration=configuration, experimentRun=experimentRun, client=client, parallelism=parallelism, env=env, template="jobtemplate-benchmarking-benchbase.yml", num_pods=num_pods, nodegroup='benchmarking', connection=connection)#, jobname=jobname) @@ -3202,7 +2901,7 @@ def create_manifest_benchmarking(self, connection, app='', component='benchmarke #@fire_and_forget -def load_data_asynch(app, component, experiment, configuration, pod_sut, scriptfolder, commands, loadData, path, volume, context, service_name): +def load_data_asynch(app, component, experiment, configuration, pod_sut, scriptfolder, commands, loadData, path, volume, context, service_name, time_offset=0, time_start_int=0, script_type='loaded'): logger = logging.getLogger('load_data_asynch') #with open('asynch.test.log','w') as file: # file.write('started') @@ -3232,20 +2931,29 @@ def kubectl(command, context): #pods = self.experiment.cluster.get_pods(component='sut', configuration=configuration, experiment=experiment) #pod_sut = pods[0] #print("load_data") - timeLoadingStart = default_timer() - now = datetime.utcnow() - now_string = now.strftime('%Y-%m-%d %H:%M:%S') - time_now = str(datetime.now()) - time_now_int = int(datetime.timestamp(datetime.strptime(time_now,'%Y-%m-%d %H:%M:%S.%f'))) + time_scriptgroup_start = default_timer() # for more precise float time spans + # do we have started previously? + if time_start_int == 0: + now = datetime.utcnow() # for UTC time as int + now_string = now.strftime('%Y-%m-%d %H:%M:%S') + time_now = str(datetime.now()) + timeLoadingStart = int(datetime.timestamp(datetime.strptime(time_now,'%Y-%m-%d %H:%M:%S.%f'))) + else: + # loading has been started previously + timeLoadingStart = int(time_start_int) + #time_now_int = int(time_start_int) + logger.debug("#### time_scriptgroup_start: "+str(time_scriptgroup_start)) + logger.debug("#### timeLoadingStart: "+str(timeLoadingStart)) + logger.debug("#### timeLoading before scrips: "+str(time_offset)) # mark pod - fullcommand = 'label pods '+pod_sut+' --overwrite loaded=False timeLoadingStart="{}"'.format(time_now_int) + fullcommand = 'label pods '+pod_sut+' --overwrite {script_type}=False timeLoadingStart="{timeLoadingStart}"'.format(script_type=script_type, timeLoadingStart=timeLoadingStart) #print(fullcommand) kubectl(fullcommand, context) #proc = subprocess.Popen(fullcommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) #stdout, stderr = proc.communicate() if len(volume) > 0: # mark pvc - fullcommand = 'label pvc '+volume+' --overwrite loaded=False timeLoadingStart="{}"'.format(time_now_int) + fullcommand = 'label pvc '+volume+' --overwrite {script_type}=False timeLoadingStart="{timeLoadingStart}"'.format(script_type=script_type, timeLoadingStart=timeLoadingStart) #print(fullcommand) kubectl(fullcommand, context) #proc = subprocess.Popen(fullcommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) @@ -3253,9 +2961,13 @@ def kubectl(command, context): # scripts #scriptfolder = '/data/{experiment}/{docker}/'.format(experiment=self.experiment.cluster.experiments_configfolder, docker=self.docker) #shellcommand = '[ -f {scriptname} ] && sh {scriptname}' + times_script = dict() shellcommand = 'if [ -f {scriptname} ]; then sh {scriptname}; else exit 0; fi' #commands = self.initscript for c in commands: + time_scrip_start = default_timer() # for more precise float time spans + #time_now = str(datetime.now()) + #time_scrip_start = int(datetime.timestamp(datetime.strptime(time_now,'%Y-%m-%d %H:%M:%S.%f'))) filename, file_extension = os.path.splitext(c) if file_extension.lower() == '.sql': stdin, stdout, stderr = execute_command_in_pod_sut(loadData.format(scriptname=scriptfolder+c, service_name=service_name), pod_sut, context) @@ -3281,21 +2993,48 @@ def kubectl(command, context): if len(stderr) > 0: with open(filename_log,'w') as file: file.write(stderr) + #time_now = str(datetime.now()) + #time_scrip_end = int(datetime.timestamp(datetime.strptime(time_now,'%Y-%m-%d %H:%M:%S.%f'))) + time_scrip_end = default_timer() + sep = filename.find("-") + if sep > 0: + subscript_type = filename[:sep].lower() + times_script[subscript_type] = time_scrip_end - time_scrip_start + logger.debug("#### script="+str(subscript_type)+" time="+str(times_script[subscript_type])) # mark pod - timeLoadingEnd = default_timer() - timeLoading = timeLoadingEnd - timeLoadingStart - now = datetime.utcnow() - now_string = now.strftime('%Y-%m-%d %H:%M:%S') + time_scriptgroup_end = default_timer() time_now = str(datetime.now()) - time_now_int = int(datetime.timestamp(datetime.strptime(time_now,'%Y-%m-%d %H:%M:%S.%f'))) - fullcommand = 'label pods '+pod_sut+' --overwrite loaded=True timeLoadingEnd="{}" timeLoading={}'.format(time_now_int, timeLoading) + timeLoadingEnd = int(datetime.timestamp(datetime.strptime(time_now,'%Y-%m-%d %H:%M:%S.%f'))) + timeLoading = time_scriptgroup_end - time_scriptgroup_start + time_offset + logger.debug("#### time_scriptgroup_end: "+str(time_scriptgroup_end)) + logger.debug("#### timeLoadingEnd: "+str(timeLoadingEnd)) + logger.debug("#### timeLoading after scrips: "+str(timeLoading)) + #now = datetime.utcnow() + #now_string = now.strftime('%Y-%m-%d %H:%M:%S') + #time_now = str(datetime.now()) + #time_now_int = int(datetime.timestamp(datetime.strptime(time_now,'%Y-%m-%d %H:%M:%S.%f'))) + # store infos in labels of sut pod and it's pvc + labels = dict() + labels[script_type] = 'True' + labels['time_{script_type}'.format(script_type=script_type)] = (time_scriptgroup_end - time_scriptgroup_start) + #labels['timeLoadingEnd'] = time_now_int # is float, so needs "" + labels['timeLoading'] = timeLoading + for subscript_type, time_subscript_type in times_script.items(): + labels['time_{script_type}'.format(script_type=subscript_type)] = time_subscript_type + fullcommand = 'label pods {pod_sut} --overwrite timeLoadingEnd="{timeLoadingEnd}" '.format(pod_sut=pod_sut, timeLoadingEnd=timeLoadingEnd) + for key, value in labels.items(): + fullcommand = fullcommand + " {key}={value}".format(key=key, value=value) + #fullcommand = 'label pods '+pod_sut+' --overwrite {script_type}=True time_{script_type}={timing_current} timeLoadingEnd="{timing}" timeLoading={timespan}'.format(script_type=script_type, timing=time_now_int, timespan=timeLoading, timing_current=(timeLoadingEnd - timeLoadingStart)) #print(fullcommand) kubectl(fullcommand, context) #proc = subprocess.Popen(fullcommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) #stdout, stderr = proc.communicate() if len(volume) > 0: # mark volume - fullcommand = 'label pvc '+volume+' --overwrite loaded=True timeLoadingEnd="{}" timeLoading={}'.format(time_now_int, timeLoading) + fullcommand = 'label pvc {volume} --overwrite timeLoadingEnd="{timeLoadingEnd}" '.format(volume=volume, timeLoadingEnd=timeLoadingEnd) + for key, value in labels.items(): + fullcommand = fullcommand + " {key}={value}".format(key=key, value=value) + #fullcommand = 'label pvc '+volume+' --overwrite {script_type}=True time_{script_type}={timing_current} timeLoadingEnd="{timing}" timeLoading={timespan}'.format(script_type=script_type, timing=time_now_int, timespan=timeLoading, timing_current=(timeLoadingEnd - timeLoadingStart)) #print(fullcommand) kubectl(fullcommand, context) #proc = subprocess.Popen(fullcommand, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) diff --git a/bexhoma/evaluators.py b/bexhoma/evaluators.py new file mode 100644 index 00000000..b3cc5a0f --- /dev/null +++ b/bexhoma/evaluators.py @@ -0,0 +1,1019 @@ +""" +:Date: 2023-01-05 +:Version: 0.6.1 +:Authors: Patrick K. Erdelt + + Module to evaluate results obtained using bexhoma. + + Copyright (C) 2020 Patrick K. Erdelt + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +""" +import pandas as pd +import os +import re +import matplotlib.pyplot as plt +pd.set_option("display.max_rows", None) +pd.set_option('display.max_colwidth', None) +# Some nice output +from IPython.display import display, Markdown +import pickle +import json +import traceback + +class base: + """ + Basis class for evaluating an experiment. + Constructor sets + + 1. `path`: path to result folders + 1. `code`: Id of the experiment (name of result folder) + """ + def __init__(self, code, path, include_loading=False, include_benchmarking=True): + """ + Initializes object by setting code and path to result folder. + + :param path: path to result folders + :param code: Id of the experiment (name of result folder) + :param include_loading: Are there results about the loading phase? + :param include_benchmarking: Are there results about the benchmarking phase? + """ + self.path = path+"/"+code + self.code = code + self.include_loading = include_loading + self.include_benchmarking = include_benchmarking + self.workflow = dict() + def end_benchmarking(self, jobname): + """ + Ends a benchmarker job. + This is for storing or cleaning measures. + The results are stored in a pandas DataFrame. + + :param jobname: Name of the job to clean + """ + pass + def end_loading(self, jobname): + """ + Ends a loading job. + This is for storing or cleaning measures. + The results are stored in a pandas DataFrame. + + :param jobname: Name of the job to clean + """ + pass + def evaluate_results(self, pod_dashboard=''): + """ + Collects all pandas DataFrames from the same phase (loading or benchmarking) and combines them into a single DataFrame. + This DataFrame is stored as a pickled file. + """ + pass + def get_df_benchmarking(self): + """ + Returns the DataFrame that containts all information about the benchmarking phase. + + :return: DataFrame of benchmarking results + """ + return pd.DataFrame() + def get_df_loading(self): + """ + Returns the DataFrame that containts all information about the loading phase. + + :return: DataFrame of loading results + """ + return pd.DataFrame() + def reconstruct_workflow(self, df): + """ + Constructs the workflow out of the results (reverse engineer workflow). + This for example looks like this: + {'MySQL-24-4-1024': [[1, 2], [1, 2]], 'MySQL-24-4-2048': [[1, 2], [1, 2]], 'PostgreSQL-24-4-1024': [[1, 2], [1, 2]], 'PostgreSQL-24-4-2048': [[1, 2], [1, 2]]} + + * 4 configurations + * each 2 experiment runs + * consisting of [1,2] benchmarker (first 1 pod, then 2 pods in parallel) + + :param df: DataFrame of benchmarking results + :return: Dict of connections + """ + # Tree of elements of the workflow + workflow = dict() + return workflow + def test_results(self): + """ + Run test script locally. + Extract exit code. + + :return: exit code of test script + """ + return 0 + + +class logger(base): + """ + Basis class for evaluating an experiment. + The transforms log files into DataFrames. + Constructor sets + + 1. `path`: path to result folders + 1. `code`: Id of the experiment (name of result folder) + """ + def end_benchmarking(self, jobname): + """ + Ends a benchmarker job. + This is for storing or cleaning measures. + The results are stored in a pandas DataFrame. + + :param jobname: Name of the job to clean + """ + path = self.path + directory = os.fsencode(path) + for file in os.listdir(directory): + filename = os.fsdecode(file) + if filename.startswith("bexhoma-benchmarker-"+jobname) and filename.endswith(".log"): + #print(filename) + df = self.log_to_df(path+"/"+filename) + #print(df) + if df.empty: + print("Error in "+filename) + else: + filename_df = path+"/"+filename+".df.pickle" + f = open(filename_df, "wb") + pickle.dump(df, f) + f.close() + def end_loading(self, jobname): + """ + Ends a loading job. + This is for storing or cleaning measures. + The results are stored in a pandas DataFrame. + + :param jobname: Name of the job to clean + """ + path = self.path + directory = os.fsencode(path) + for file in os.listdir(directory): + filename = os.fsdecode(file) + if filename.startswith("bexhoma-loading-"+jobname) and filename.endswith(".sensor.log"): + #print(filename) + df = self.log_to_df(path+"/"+filename) + #print(df) + if df.empty: + print("Error in "+filename) + else: + filename_df = path+"/"+filename+".df.pickle" + f = open(filename_df, "wb") + pickle.dump(df, f) + f.close() + def _collect_dfs(self, filename_result='', filename_source_start='', filename_source_end=''): + """ + Collects all pandas DataFrames from the same phase (loading or benchmarking) and combines them into a single DataFrame. + This DataFrame is stored as a pickled file. + Source files are identifies by a pattern "filename_source_start*filename_source_end" + + :param filename_result: Name of the pickled result file + :param filename_source_start: Begin of name pattern for source files + :param filename_source_end: End of name pattern for source files + """ + df_collected = None + path = self.path + directory = os.fsencode(path) + for file in os.listdir(directory): + filename = os.fsdecode(file) + if filename.startswith(filename_source_start) and filename.endswith(filename_source_end): + #print(filename) + df = pd.read_pickle(path+"/"+filename) + if not df.empty: + #df['configuration'] = df.index.name + if df_collected is not None: + df_collected = pd.concat([df_collected, df]) + else: + df_collected = df.copy() + if not df_collected is None and not df_collected.empty: + df_collected['index'] = df_collected.groupby('connection')['connection'].cumcount() + 1#df_collected.index.map(str) + df_collected['connection_pod'] = df_collected['connection']+"-"+df_collected['index'].astype(str) + #df_collected['connection_pod'] = df_collected.groupby('connection')['connection'].cumcount() + 1#.transform('count') + #print(df_collected) + df_collected.drop('index', axis=1, inplace=True) + df_collected.set_index('connection_pod', inplace=True) + #print(df_collected) + filename_df = path+"/"+filename_result + f = open(filename_df, "wb") + pickle.dump(df_collected, f) + f.close() + #self.cluster.logger.debug(df_collected) + def transform_all_logs_benchmarking(self): + """ + Collects all pandas DataFrames from the same phase (loading or benchmarking) and combines them into a single DataFrame. + This DataFrame is stored as a pickled file. + """ + directory = os.fsencode(self.path) + for file in os.listdir(directory): + filename = os.fsdecode(file) + if filename.startswith("bexhoma-benchmarker") and filename.endswith(".log"): + #print("filename:", filename) + pod_name = filename[filename.rindex("-")+1:-len(".log")] + #print("pod_name:", pod_name) + jobname = filename[len("bexhoma-benchmarker-"):-len("-"+pod_name+".log")] + #print("jobname:", jobname) + self.end_benchmarking(jobname) + def transform_all_logs_loading(self): + """ + Collects all pandas DataFrames from the same phase (loading or benchmarking) and combines them into a single DataFrame. + This DataFrame is stored as a pickled file. + """ + directory = os.fsencode(self.path) + for file in os.listdir(directory): + filename = os.fsdecode(file) + if filename.startswith("bexhoma-loading") and filename.endswith(".sensor.log"): + #print("filename:", filename) + pod_name = filename[filename.rindex("-")+1:-len(".log")] + #print("pod_name:", pod_name) + jobname = filename[len("bexhoma-loading-"):-len("-"+pod_name+".sensor.log")] + #print("jobname:", jobname) + self.end_loading(jobname) + def evaluate_results(self, pod_dashboard=''): + """ + Collects all pandas DataFrames from the same phase (loading or benchmarking) and combines them into a single DataFrame. + This DataFrame is stored as a pickled file. + """ + if self.include_benchmarking: + self.transform_all_logs_benchmarking() + self._collect_dfs(filename_result="bexhoma-benchmarker.all.df.pickle" , filename_source_start="bexhoma-benchmarker", filename_source_end=".log.df.pickle") + if self.include_loading: + self.transform_all_logs_loading() + self._collect_dfs(filename_result="bexhoma-loading.all.df.pickle" , filename_source_start="bexhoma-loading", filename_source_end=".log.df.pickle") + def get_df_benchmarking(self): + """ + Returns the DataFrame that containts all information about the benchmarking phase. + + :return: DataFrame of benchmarking results + """ + filename = "bexhoma-benchmarker.all.df.pickle" + df = pd.read_pickle(self.path+"/"+filename) + #df#.sort_values(["configuration", "pod"]) + return df + def get_df_loading(self): + """ + Returns the DataFrame that containts all information about the loading phase. + + :return: DataFrame of loading results + """ + filename = "bexhoma-loading.all.df.pickle" + df = pd.read_pickle(self.path+"/"+filename) + #df#.sort_values(["configuration", "pod"]) + return df + def plot(self, df, column, x, y, plot_by=None): + if plot_by is None: + fig, ax = plt.subplots() + for key, grp in df.groupby(column): + labels = "{} {}".format(key, column) + ax = grp.plot(ax=ax, kind='line', x=x, y=y, label=labels) + ax.set_ylim(0,df[y].max()) + plt.legend(loc='best') + plt.show() + else: + row=0 + col=0 + groups = df.groupby(plot_by) + #print(len(groups)) + rows = (len(groups)+1)//2 + #print(rows, "rows") + fig, axes = plt.subplots(nrows=rows, ncols=2, sharex=True, squeeze=False) + #print(axes) + for key1, grp in groups:#df3.groupby(col1): + #print(len(axs)) + for key2, grp2 in grp.groupby(column): + #print(grp2) + labels = "{} {}, {} {}".format(key1, plot_by, key2, column) + #print(row,col) + ax = grp2.plot(ax=axes[row,col], kind='line', x=x, y=y, label=labels, title=y, figsize=(12,8), layout=(rows,2)) + ax.set_ylim(0, df[y].max()) + col = col + 1 + if col > 1: + row = row + 1 + col = 0 + plt.legend(loc='best') + plt.tight_layout() + plt.show() + def reconstruct_workflow(self, df): + """ + Constructs the workflow out of the results (reverse engineer workflow). + This for example looks like this: + {'MySQL-24-4-1024': [[1, 2], [1, 2]], 'MySQL-24-4-2048': [[1, 2], [1, 2]], 'PostgreSQL-24-4-1024': [[1, 2], [1, 2]], 'PostgreSQL-24-4-2048': [[1, 2], [1, 2]]} + + * 4 configurations + * each 2 experiment runs + * consisting of [1,2] benchmarker (first 1 pod, then 2 pods in parallel) + + :param df: DataFrame of benchmarking results + :return: Dict of connections + """ + # Tree of elements of the workflow + configs = dict() + for index, row in df.iterrows(): + #print(row['experiment_run'], row['configuration']) + if row['configuration'] not in configs: + configs[row['configuration']] = dict() + #configs[row['configuration']] + if row['experiment_run'] not in configs[row['configuration']]: + configs[row['configuration']][row['experiment_run']] = dict() + if row['client'] not in configs[row['configuration']][row['experiment_run']]: + configs[row['configuration']][row['experiment_run']][row['client']] = dict() + configs[row['configuration']][row['experiment_run']][row['client']]['pods'] = dict() + configs[row['configuration']][row['experiment_run']][row['client']]['result_count'] = 0 + #configs[row['configuration']][row['experiment_run']][row['client']]['run'] = dict() + configs[row['configuration']][row['experiment_run']][row['client']]['pods'][row['pod']] = True + configs[row['configuration']][row['experiment_run']][row['client']]['result_count'] = configs[row['configuration']][row['experiment_run']][row['client']]['result_count'] + 1 + #configs[row['configuration']][row['experiment_run']][row['client']]['run'][row['run']] = dict() + #configs[row['configuration']][row['experiment_run']][row['client']]['run'][row['run']]['vusers'] = row['vusers'] + #print(configs) + #pretty_configs = json.dumps(configs, indent=2) + #print(pretty_configs) + # Flat version of workflow + workflow = dict() + for index, row in configs.items(): + workflow[index] = [] + for i, v in row.items(): + l = [] + for j, w in v.items(): + l.append(len(w['pods'])) + workflow[index].append(l) + #print(workflow) + #pretty_workflow = json.dumps(workflow, indent=2) + #print(pretty_workflow) + return workflow + def log_to_df(self, filename): + """ + Transforms a log file in text format into a pandas DataFrame. + + :param filename: Name of the log file + :return: DataFrame of results + """ + return pd.DataFrame() + def test_results(self): + """ + Run test script locally. + Extract exit code. + + :return: exit code of test script + """ + try: + if self.include_benchmarking: + df = self.get_df_benchmarking() + print(df) + self.workflow = self.reconstruct_workflow(df) + print(self.workflow) + if self.include_loading: + df = self.get_df_loading() + print(df) + return 0 + except Exception as e: + print(e) + return 1 + + + +class ycsb(logger): + """ + Class for evaluating an YCSB experiment. + Constructor sets + + 1. `path`: path to result folders + 1. `code`: Id of the experiment (name of result folder) + """ + def __init__(self, code, path, include_loading=False, include_benchmarking=True): + super().__init__(code, path, True, True) + def log_to_df(self, filename): + """ + Transforms a log file in text format into a pandas DataFrame. + + :param filename: Name of the log file + :return: DataFrame of results + """ + try: + with open(filename) as f: + lines = f.readlines() + stdout = "".join(lines) + pod_name = filename[filename.rindex("-")+1:-len(".log")] + connection_name = re.findall('BEXHOMA_CONNECTION:(.+?)\n', stdout)[0] + configuration_name = re.findall('BEXHOMA_CONFIGURATION:(.+?)\n', stdout)[0] + sf = re.findall('SF (.+?)\n', stdout)[0] + experiment_run = re.findall('BEXHOMA_EXPERIMENT_RUN:(.+?)\n', stdout)[0] + client = re.findall('BEXHOMA_CLIENT:(.+?)\n', stdout)[0] + target = re.findall('YCSB_TARGET (.+?)\n', stdout)[0] + threads = re.findall('YCSB_THREADCOUNT (.+?)\n', stdout)[0] + #workload = re.findall('YCSB_WORKLOAD (.+?)\n', stdout)[0] + workload = "A" + pod_count = re.findall('NUM_PODS (.+?)\n', stdout)[0] + result = [] + #for line in s.split("\n"): + for line in lines: + line = line.strip('\n') + cells = line.split(", ") + #print(cells) + if len(cells[0]) and cells[0][0] == "[": + result.append(line.split(", ")) + #print(result) + #return + list_columns = [value[0]+"."+value[1] for value in result] + list_values = [connection_name, configuration_name, experiment_run, client, pod_name, pod_count, threads, target, sf, workload] + list_measures = [value[2] for value in result] + #list_values = [connection_name, configuration_name, experiment_run, pod_name].append([value[2] for value in result]) + #print(list_columns) + #print(list_values) + #print(list_measures) + #exit() + list_values.extend(list_measures) + #print(list_values) + df = pd.DataFrame(list_values) + df = df.T + columns = ['connection', 'configuration', 'experiment_run', 'client', 'pod', 'pod_count', 'threads', 'target', 'sf', 'workload'] + columns.extend(list_columns) + #print(columns) + df.columns = columns + df.index.name = connection_name + return df + except Exception as e: + print(e) + return pd.DataFrame() + def benchmarking_set_datatypes(self, df): + """ + Transforms a pandas DataFrame collection of benchmarking results to suitable data types. + + :param df: DataFrame of results + :return: DataFrame of results + """ + df_typed = df.astype({ + 'connection':'str', + 'configuration':'str', + 'experiment_run':'int', + 'client':'int', + 'pod':'str', + 'pod_count':'int', + 'threads':'int', + 'target':'int', + 'sf':'int', + 'workload':'str', + '[OVERALL].RunTime(ms)':'float', + '[OVERALL].Throughput(ops/sec)':'float', + '[TOTAL_GCS_PS_Scavenge].Count':'int', + '[TOTAL_GC_TIME_PS_Scavenge].Time(ms)':'float', + '[TOTAL_GC_TIME_%_PS_Scavenge].Time(%)':'float', + '[TOTAL_GCS_PS_MarkSweep].Count':'int', + '[TOTAL_GC_TIME_PS_MarkSweep].Time(ms)':'float', + '[TOTAL_GC_TIME_%_PS_MarkSweep].Time(%)':'float', + '[TOTAL_GCs].Count':'int', + '[TOTAL_GC_TIME].Time(ms)':'float', + '[TOTAL_GC_TIME_%].Time(%)':'float', + '[READ].Operations':'int', + '[READ].AverageLatency(us)':'float', + '[READ].MinLatency(us)':'float', + '[READ].MaxLatency(us)':'float', + '[READ].95thPercentileLatency(us)':'float', + '[READ].99thPercentileLatency(us)':'float', + '[READ].Return=OK':'int', + '[CLEANUP].Operations':'int', + '[CLEANUP].AverageLatency(us)':'float', + '[CLEANUP].MinLatency(us)':'float', + '[CLEANUP].MaxLatency(us)':'float', + '[CLEANUP].95thPercentileLatency(us)':'float', + '[CLEANUP].99thPercentileLatency(us)':'float', + '[UPDATE].Operations':'int', + '[UPDATE].AverageLatency(us)':'float', + '[UPDATE].MinLatency(us)':'float', + '[UPDATE].MaxLatency(us)':'float', + '[UPDATE].95thPercentileLatency(us)':'float', + '[UPDATE].99thPercentileLatency(us)':'float', + '[UPDATE].Return=OK': 'int', + }) + return df_typed + def benchmarking_aggregate_by_parallel_pods(self, df): + """ + Transforms a pandas DataFrame collection of benchmarking results to a new DataFrame. + All result lines belonging to pods being run in parallel will be aggregated. + + :param df: DataFrame of results + :return: DataFrame of results + """ + column = "connection" + df_aggregated = pd.DataFrame() + for key, grp in df.groupby(column): + #print(key, len(grp.index)) + #print(grp) + aggregate = { + 'client':'max', + 'pod':'sum', + 'pod_count':'count', + 'threads':'sum', + 'target':'sum', + 'sf':'max', + 'workload':'max', + '[OVERALL].RunTime(ms)':'max', + '[OVERALL].Throughput(ops/sec)':'sum', + '[TOTAL_GCS_PS_Scavenge].Count':'sum', + '[TOTAL_GC_TIME_PS_Scavenge].Time(ms)':'max', + '[TOTAL_GC_TIME_%_PS_Scavenge].Time(%)':'max', + '[TOTAL_GCS_PS_MarkSweep].Count':'sum', + '[TOTAL_GC_TIME_PS_MarkSweep].Time(ms)':'max', + '[TOTAL_GC_TIME_%_PS_MarkSweep].Time(%)':'max', + '[TOTAL_GCs].Count':'sum', + '[TOTAL_GC_TIME].Time(ms)':'max', + '[TOTAL_GC_TIME_%].Time(%)':'max', + '[READ].Operations':'sum', + '[READ].AverageLatency(us)':'mean', + '[READ].MinLatency(us)':'min', + '[READ].MaxLatency(us)':'max', + '[READ].95thPercentileLatency(us)':'max', + '[READ].99thPercentileLatency(us)':'max', + '[READ].Return=OK':'sum', + '[CLEANUP].Operations':'sum', + '[CLEANUP].AverageLatency(us)':'mean', + '[CLEANUP].MinLatency(us)':'min', + '[CLEANUP].MaxLatency(us)':'max', + '[CLEANUP].95thPercentileLatency(us)':'max', + '[CLEANUP].99thPercentileLatency(us)':'max', + '[UPDATE].Operations':'sum', + '[UPDATE].AverageLatency(us)':'mean', + '[UPDATE].MinLatency(us)':'min', + '[UPDATE].MaxLatency(us)':'max', + '[UPDATE].95thPercentileLatency(us)':'max', + '[UPDATE].99thPercentileLatency(us)':'max', + '[UPDATE].Return=OK': 'sum', + } + #print(grp.agg(aggregate)) + dict_grp = dict() + dict_grp['connection'] = key + dict_grp['configuration'] = grp['configuration'][0] + dict_grp['experiment_run'] = grp['experiment_run'][0] + #dict_grp['client'] = grp['client'][0] + #dict_grp['pod'] = grp['pod'][0] + dict_grp = {**dict_grp, **grp.agg(aggregate)} + df_grp = pd.DataFrame(dict_grp, index=[key])#columns=list(dict_grp.keys())) + #df_grp = df_grp.T + #df_grp.set_index('connection', inplace=True) + #print(df_grp) + df_aggregated = pd.concat([df_aggregated, df_grp]) + return df_aggregated + def loading_set_datatypes(self, df): + """ + Transforms a pandas DataFrame collection of loading results to suitable data types. + + :param df: DataFrame of results + :return: DataFrame of results + """ + #df = evaluation.get_df_loading() + df_typed = df.astype({ + 'connection':'str', + 'configuration':'str', + 'experiment_run':'int', + 'client':'int', + 'pod':'str', + 'pod_count':'int', + 'threads':'int', + 'target':'int', + 'sf':'int', + 'workload':'str', + '[OVERALL].RunTime(ms)':'float', + '[OVERALL].Throughput(ops/sec)':'float', + '[TOTAL_GCS_PS_Scavenge].Count':'int', + '[TOTAL_GC_TIME_PS_Scavenge].Time(ms)':'float', + '[TOTAL_GC_TIME_%_PS_Scavenge].Time(%)':'float', + '[TOTAL_GCS_PS_MarkSweep].Count':'float', + '[TOTAL_GC_TIME_PS_MarkSweep].Time(ms)':'float', + '[TOTAL_GC_TIME_%_PS_MarkSweep].Time(%)':'float', + '[TOTAL_GCs].Count':'int', + '[TOTAL_GC_TIME].Time(ms)':'float', + '[TOTAL_GC_TIME_%].Time(%)':'float', + '[CLEANUP].Operations':'int', + '[CLEANUP].AverageLatency(us)':'float', + '[CLEANUP].MinLatency(us)':'float', + '[CLEANUP].MaxLatency(us)':'float', + '[CLEANUP].95thPercentileLatency(us)':'float', + '[CLEANUP].99thPercentileLatency(us)':'float', + '[INSERT].Operations':'int', + '[INSERT].AverageLatency(us)':'float', + '[INSERT].MinLatency(us)':'float', + '[INSERT].MaxLatency(us)':'float', + '[INSERT].95thPercentileLatency(us)':'float', + '[INSERT].99thPercentileLatency(us)':'float', + '[INSERT].Return=OK':'int', + }) + return df_typed + def loading_aggregate_by_parallel_pods(self, df): + """ + Transforms a pandas DataFrame collection of loading results to a new DataFrame. + All result lines belonging to pods being run in parallel will be aggregated. + + :param df: DataFrame of results + :return: DataFrame of results + """ + column = ["connection","experiment_run"] + df_aggregated = pd.DataFrame() + for key, grp in df.groupby(column): + #print(key, len(grp.index)) + #print(grp) + aggregate = { + 'client':'max', + 'pod':'sum', + 'pod_count':'count', + 'threads':'sum', + 'target':'sum', + 'sf':'max', + 'workload':'max', + '[OVERALL].RunTime(ms)':'max', + '[OVERALL].Throughput(ops/sec)':'sum', + '[TOTAL_GCS_PS_Scavenge].Count':'sum', + '[TOTAL_GC_TIME_PS_Scavenge].Time(ms)':'max', + '[TOTAL_GC_TIME_%_PS_Scavenge].Time(%)':'max', + '[TOTAL_GCS_PS_MarkSweep].Count':'sum', + '[TOTAL_GC_TIME_PS_MarkSweep].Time(ms)':'max', + '[TOTAL_GC_TIME_%_PS_MarkSweep].Time(%)':'max', + '[TOTAL_GCs].Count':'sum', + '[TOTAL_GC_TIME].Time(ms)':'max', + '[TOTAL_GC_TIME_%].Time(%)':'max', + '[CLEANUP].Operations':'sum', + '[CLEANUP].AverageLatency(us)':'mean', + '[CLEANUP].MinLatency(us)':'min', + '[CLEANUP].MaxLatency(us)':'max', + '[CLEANUP].95thPercentileLatency(us)':'max', + '[CLEANUP].99thPercentileLatency(us)':'max', + '[INSERT].Operations':'sum', + '[INSERT].AverageLatency(us)':'mean', + '[INSERT].MinLatency(us)':'min', + '[INSERT].MaxLatency(us)':'max', + '[INSERT].95thPercentileLatency(us)':'max', + '[INSERT].99thPercentileLatency(us)':'max', + '[INSERT].Return=OK':'sum', + } + #print(grp.agg(aggregate)) + dict_grp = dict() + dict_grp['connection'] = key[0] + dict_grp['configuration'] = grp['configuration'][0] + dict_grp['experiment_run'] = grp['experiment_run'][0] + #dict_grp['client'] = grp['client'][0] + #dict_grp['pod'] = grp['pod'][0] + #dict_grp['pod_count'] = grp['pod_count'][0] + dict_grp = {**dict_grp, **grp.agg(aggregate)} + #print(dict_grp) + df_grp = pd.DataFrame(dict_grp, index=[key[0]])#columns=list(dict_grp.keys())) + #print(df_grp) + #df_grp = df_grp.T + #df_grp.set_index('connection', inplace=True) + #print(df_grp) + df_aggregated = pd.concat([df_aggregated, df_grp]) + return df_aggregated + + + + +class benchbase(logger): + """ + Class for evaluating a Benchbase experiment. + Constructor sets + + 1. `path`: path to result folders + 1. `code`: Id of the experiment (name of result folder) + """ + def log_to_df(self, filename): + """ + Transforms a log file in text format into a pandas DataFrame. + + :param filename: Name of the log file + :return: DataFrame of results + """ + stdout = "" + try: + with open(filename) as f: + lines = f.readlines() + stdout = "".join(lines) + pod_name = filename[filename.rindex("-")+1:-len(".log")] + connection_name = re.findall('BEXHOMA_CONNECTION:(.+?)\n', stdout)[0] + configuration_name = re.findall('BEXHOMA_CONFIGURATION:(.+?)\n', stdout)[0] + experiment_run = re.findall('BEXHOMA_EXPERIMENT_RUN:(.+?)\n', stdout)[0] + client = re.findall('BEXHOMA_CLIENT:(.+?)\n', stdout)[0] + error_timesynch = re.findall('start time has already passed', stdout) + if len(error_timesynch) > 0: + # log is incomplete + return pd.DataFrame() + pod_count = re.findall('NUM_PODS (.+?)\n', stdout)[0] + bench = re.findall('BENCHBASE_BENCH (.+?)\n', stdout)[0] + profile = re.findall('BENCHBASE_PROFILE (.+?)\n', stdout)[0] + target = re.findall('BENCHBASE_TARGET (.+?)\n', stdout)[0] + time = re.findall('BENCHBASE_TIME (.+?)\n', stdout)[0] + #terminals = re.findall('BENCHBASE_TERMINALS (.+?)\n', stdout)[0] + batchsize = re.findall('BENCHBASE_BATCHSIZE (.+?)\n', stdout)[0] + sf = re.findall('SF (.+?)\n', stdout)[0] + errors = re.findall('Exception in thread ', stdout) + num_errors = len(errors) + header = { + 'connection': connection_name, + 'configuration': configuration_name, + 'experiment_run': experiment_run, + 'client': client, + 'pod': pod_name, + 'pod_count': pod_count, + 'bench': bench, + 'profile': profile, + 'target': target, + 'time': time, + #'terminals': terminals, + 'batchsize': batchsize, + 'sf': sf, + 'num_errors': num_errors, + } + df_header = pd.DataFrame(header, index=[0]) + if num_errors == 0: + log = re.findall('####BEXHOMA####(.+?)####BEXHOMA####', stdout, re.DOTALL) + if len(log) > 0: + result = json.loads(log[0]) + df = pd.json_normalize(result) + #self.cluster.logger.debug(df) + df = pd.concat([df_header, df], axis=1) + df.index.name = connection_name + #print(df) + return df + else: + print("no results found in log file {}".format(filename)) + return pd.DataFrame() + else: + return pd.DataFrame() + except Exception as e: + print(e) + print(traceback.format_exc()) + print(stdout) + return pd.DataFrame() + def benchmarking_set_datatypes(self, df): + """ + Transforms a pandas DataFrame collection of benchmarking results to suitable data types. + + :param df: DataFrame of results + :return: DataFrame of results + """ + df_typed = df.astype({ + 'connection':'str', + 'configuration':'str', + 'experiment_run':'int', + 'client':'int', + 'pod':'str', + 'pod_count':'int', + 'bench':'str', + 'profile':'str', + 'target':'int', + 'time':'float', + #'terminals':'int', + 'batchsize':'int', + 'sf':'int', + 'num_errors':'int', + 'scalefactor':'int', + 'Current Timestamp (milliseconds)':'str', + 'Benchmark Type':'str', + 'isolation':'str', + 'DBMS Version':'str', + 'Goodput (requests/second)':'float', + 'terminals':'int', + 'DBMS Type':'str', + 'Throughput (requests/second)':'float', + 'Latency Distribution.95th Percentile Latency (microseconds)':'float', + 'Latency Distribution.Maximum Latency (microseconds)':'float', + 'Latency Distribution.Median Latency (microseconds)':'float', + 'Latency Distribution.Minimum Latency (microseconds)':'float', + 'Latency Distribution.25th Percentile Latency (microseconds)':'float', + 'Latency Distribution.90th Percentile Latency (microseconds)':'float', + 'Latency Distribution.99th Percentile Latency (microseconds)':'float', + 'Latency Distribution.75th Percentile Latency (microseconds)':'float', + 'Latency Distribution.Average Latency (microseconds)':'float', + }) + return df_typed + def benchmarking_aggregate_by_parallel_pods(self, df): + """ + Transforms a pandas DataFrame collection of benchmarking results to a new DataFrame. + All result lines belonging to pods being run in parallel will be aggregated. + + :param df: DataFrame of results + :return: DataFrame of results + """ + column = "connection" + df_aggregated = pd.DataFrame() + for key, grp in df.groupby(column): + #print(key, len(grp.index)) + #print(grp.columns) + aggregate = { + 'client':'max', + 'pod':'sum', + 'pod_count':'count', + 'bench':'max', + 'profile':'max', + 'target':'sum', + 'time':'max', + #'terminals':'sum', + 'batchsize':'mean', + 'sf':'max', + 'num_errors':'sum', + 'scalefactor':'max', + 'Current Timestamp (milliseconds)':'max', + 'Benchmark Type':'max', + 'isolation':'max', + 'DBMS Version':'max', + 'Goodput (requests/second)':'sum', + 'terminals':'sum', + 'DBMS Type':'max', + 'Throughput (requests/second)':'sum', + 'Latency Distribution.95th Percentile Latency (microseconds)':'max', + 'Latency Distribution.Maximum Latency (microseconds)':'max', + 'Latency Distribution.Median Latency (microseconds)':'max', + 'Latency Distribution.Minimum Latency (microseconds)':'min', + 'Latency Distribution.25th Percentile Latency (microseconds)':'max', + 'Latency Distribution.90th Percentile Latency (microseconds)':'max', + 'Latency Distribution.99th Percentile Latency (microseconds)':'max', + 'Latency Distribution.75th Percentile Latency (microseconds)':'max', + 'Latency Distribution.Average Latency (microseconds)':'mean', + } + #print(grp.agg(aggregate)) + dict_grp = dict() + dict_grp['connection'] = key + dict_grp['configuration'] = grp['configuration'][0] + dict_grp['experiment_run'] = grp['experiment_run'][0] + #dict_grp['client'] = grp['client'][0] + #dict_grp['pod'] = grp['pod'][0] + #print(dict_grp) + dict_grp = {**dict_grp, **grp.agg(aggregate)} + df_grp = pd.DataFrame(dict_grp, index=[key])#columns=list(dict_grp.keys())) + #df_grp = df_grp.T + #df_grp.set_index('connection', inplace=True) + #print(df_grp) + df_aggregated = pd.concat([df_aggregated, df_grp]) + return df_aggregated + + + + + +class tpcc(logger): + """ + Class for evaluating an TPC-C experiment (in the HammerDB version). + Constructor sets + + 1. `path`: path to result folders + 1. `code`: Id of the experiment (name of result folder) + """ + def log_to_df(self, filename): + """ + Transforms a log file in text format into a pandas DataFrame. + + :param filename: Name of the log file + :return: DataFrame of results + """ + try: + with open(filename) as f: + lines = f.readlines() + stdout = "".join(lines) + # extract "wz4bp" from "./1672716717/bexhoma-benchmarker-mariadb-bht-10-9-4-1672716717-1-1-wz4bp.log" + #print(filename, filename.rindex("-")) + pod_name = filename[filename.rindex("-")+1:-len(".log")] + #print("pod_name:", pod_name) + connection_name = re.findall('BEXHOMA_CONNECTION:(.+?)\n', stdout)[0] + configuration_name = re.findall('BEXHOMA_CONFIGURATION:(.+?)\n', stdout)[0] + experiment_run = re.findall('BEXHOMA_EXPERIMENT_RUN:(.+?)\n', stdout)[0] + iterations = re.findall('HAMMERDB_ITERATIONS (.+?)\n', stdout)[0] + duration = re.findall('HAMMERDB_DURATION (.+?)\n', stdout)[0] + rampup = re.findall('HAMMERDB_RAMPUP (.+?)\n', stdout)[0] + sf = re.findall('SF (.+?)\n', stdout)[0] + vusers_loading = re.findall('PARALLEL (.+?)\n', stdout)[0] + client = re.findall('BEXHOMA_CLIENT:(.+?)\n', stdout)[0] + #client = "1" + error_timesynch = re.findall('start time has already passed', stdout) + if len(error_timesynch) > 0: + # log is incomplete + print(filename, "log is incomplete") + return pd.DataFrame() + pod_count = re.findall('NUM_PODS (.+?)\n', stdout)[0] + errors = re.findall('Error ', stdout) + if len(errors) > 0: + # something went wrong + print(filename, "something went wrong") + num_errors = len(errors) + #print("connection_name:", connection_name) + results = re.findall("Vuser 1:TEST RESULT : System achieved (.+?) NOPM from (.+?) (.+?) TPM", stdout) + #print(results) + vusers = re.findall("Vuser 1:(.+?) Active", stdout) + #print(vusers) + result_tupels = list(zip(results, vusers)) + #for (result, vuser) in result_tupels: + # print(result, vuser) + #print(result) + result_list = [(connection_name, configuration_name, experiment_run, client, pod_name, pod_count, iterations, duration, rampup, sf, i, num_errors, vusers_loading, vuser, result[0], result[1], result[2]) for i, (result, vuser) in enumerate(result_tupels)] + df = pd.DataFrame(result_list) + df.columns = ['connection', 'configuration', 'experiment_run', 'client', 'pod', 'pod_count', 'iterations', 'duration', 'rampup', 'sf', 'run', 'errors', 'vusers_loading', 'vusers', 'NOPM', 'TPM', 'dbms'] + df.index.name = connection_name + return df + except Exception as e: + print(e) + print(traceback.format_exc()) + return pd.DataFrame() + def test_results(self): + """ + Run test script locally. + Extract exit code. + + :return: exit code of test script + """ + try: + #path = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}'.format(self.code) + #path = '../benchmarks/1669163583' + directory = os.fsencode(self.path) + for file in os.listdir(directory): + filename = os.fsdecode(file) + if filename.endswith(".pickle"): + df = pd.read_pickle(self.path+"/"+filename) + print(df) + print(df.index.name) + list_vusers = list(df['vusers']) + print(list_vusers) + print("vusers", " ".join(list_vusers)) + return super().test_results() + except Exception as e: + print(e) + print(traceback.format_exc()) + return 1 + def benchmarking_set_datatypes(self, df): + """ + Transforms a pandas DataFrame collection of benchmarking results to suitable data types. + + :param df: DataFrame of results + :return: DataFrame of results + """ + df_typed = df.astype({ + 'connection':'str', + 'configuration':'str', + 'experiment_run':'int', + 'client':'int', + 'pod':'str', + 'pod_count':'int', + 'iterations':'int', + 'duration':'int', + 'rampup':'int', + 'sf':'int', + 'run':'int', + 'errors':'int', + 'vusers_loading':'int', + 'vusers':'int', + 'NOPM':'int', + 'TPM':'int', + 'dbms':'str', + }) + return df_typed + def benchmarking_aggregate_by_parallel_pods(self, df): + """ + Transforms a pandas DataFrame collection of benchmarking results to a new DataFrame. + All result lines belonging to pods being run in parallel will be aggregated. + + :param df: DataFrame of results + :return: DataFrame of results + """ + column = ["connection","run"] + df_aggregated = pd.DataFrame() + for key, grp in df.groupby(column): + #print(key, len(grp.index)) + #print(grp) + aggregate = { + 'client':'max', + 'pod':'sum', + 'pod_count':'count', + 'iterations':'max', + 'duration':'max', + 'sf':'max', + 'run':'max', + 'errors':'max', + 'vusers_loading':'max', + 'vusers':'sum', + 'NOPM':'sum', + 'TPM':'sum', + 'dbms':'max', + } + #print(grp.agg(aggregate)) + dict_grp = dict() + dict_grp['connection'] = key[0] + dict_grp['configuration'] = grp['configuration'][0] + dict_grp['experiment_run'] = grp['experiment_run'][0] + #dict_grp['client'] = grp['client'][0] + #dict_grp['pod'] = grp['pod'][0] + dict_grp = {**dict_grp, **grp.agg(aggregate)} + df_grp = pd.DataFrame(dict_grp, index=[key[0]])#columns=list(dict_grp.keys())) + #df_grp = df_grp.T + #df_grp.set_index('connection', inplace=True) + #print(df_grp) + df_aggregated = pd.concat([df_aggregated, df_grp]) + return df_aggregated + + + +# grep "start time has already passed" ../benchmarks/1672653866/* diff --git a/bexhoma/experiments.py b/bexhoma/experiments.py index e4f5b3fb..2a91f57a 100644 --- a/bexhoma/experiments.py +++ b/bexhoma/experiments.py @@ -43,6 +43,7 @@ import pickle import json +from bexhoma import evaluators urllib3.disable_warnings() logging.basicConfig(level=logging.ERROR) @@ -112,12 +113,16 @@ def __init__(self, self.nodes = {} self.maintaining_parameters = {} self.loading_parameters = {} + self.loading_patch = "" self.benchmarking_parameters = {} self.jobtemplate_maintaining = "" self.jobtemplate_loading = "" self.querymanagement = {} + self.additional_labels = dict() self.workload = {} self.monitoring_active = True + self.prometheus_interval = "3s" + self.prometheus_timeout = "3s" self.loading_active = False self.num_loading = 0 self.num_loading_pods = 0 @@ -125,10 +130,15 @@ def __init__(self, self.num_maintaining = 0 self.num_maintaining_pods = 0 self.name_format = None + self.script = "" + self.initscript = [] + self.indexing = "" + self.indexscript = [] # k8s: self.namespace = self.cluster.namespace self.configurations = [] self.storage_label = '' + self.evaluator = evaluators.base(code=self.code, path=self.cluster.resultfolder, include_loading=True, include_benchmarking=True) def wait(self, sec): """ Function for waiting some time and inform via output about this @@ -164,6 +174,15 @@ def set_experiments_configfolder(self, experiments_configfolder): :param experiments_configfolder: Relative path to an experiment folder """ self.experiments_configfolder = experiments_configfolder + def set_additional_labels(self, **kwargs): + """ + Sets additional labels, that will be put to K8s objects (and ignored otherwise). + This is for the SUT component. + Can be overwritten by configuration. + + :param kwargs: Dict of labels, example 'SF' => 100 + """ + self.additional_labels = {**self.additional_labels, **kwargs} def set_workload(self, **kwargs): """ Sets mata data about the experiment, for example name and description. @@ -265,6 +284,14 @@ def set_loading_parameters(self, **kwargs): :param kwargs: Dict of meta data, example 'PARALLEL' => '64' """ self.loading_parameters = kwargs + def patch_loading(self, patch): + """ + Patches YAML of loading components. + Can be overwritten by configuration. + + :param patch: String in YAML format, overwrites basic YAML file content + """ + self.loading_patch = patch def set_loading(self, parallel, num_pods=None): """ Sets job parameters for loading components: Number of parallel pods and optionally (if different) total number of pods. @@ -435,7 +462,7 @@ def test_results(self): finally: return 1 return 1 - def set_experiment(self, instance=None, volume=None, docker=None, script=None): + def set_experiment(self, instance=None, volume=None, docker=None, script=None, indexing=None): """ Read experiment details from cluster config @@ -456,6 +483,9 @@ def set_experiment(self, instance=None, volume=None, docker=None, script=None): if script is not None: self.script = script self.initscript = self.cluster.volumes[self.volume]['initscripts'][self.script] + if indexing is not None: + self.indexing = indexing + self.indexscript = self.cluster.volumes[self.volume]['initscripts'][self.indexing] def evaluate_results(self, pod_dashboard=''): """ Let the dashboard pod build the evaluations. @@ -649,6 +679,23 @@ def add_benchmark_list(self, list_clients): """ for config in self.configurations: config.add_benchmark_list(list_clients) + def get_workflow_list(self): + """ + Returns benchmarking workflow as dict of lists of lists. + Keys are connection names. + Values are lists of lists. + Each inner list is for example added by add_benchmark_list(), c.f. + Inner lists are repeated according to self.num_experiment_to_apply. + Example: {'PostgreSQL-24-1-16384': [[1, 2]], 'MySQL-24-1-16384': [[1, 2]], 'PostgreSQL-24-1-32768': [[1, 2]], 'MySQL-24-1-32768': [[1, 2]]} + + :return: Dict of benchmarking workflow + """ + workflow = {} + for configuration in self.configurations: + workflow[configuration.configuration] = [configuration.benchmark_list_template for i in range(configuration.num_experiment_to_apply)] + self.cluster.logger.debug('default.get_workflow_list({})'.format(workflow)) + #print(workflow) + return workflow def work_benchmark_list(self, intervals=30, stop=True): """ Run typical workflow: @@ -807,38 +854,38 @@ def work_benchmark_list(self, intervals=30, stop=True): else: print("{} is loading".format(config.configuration)) # all jobs of configuration - benchmarker + #app = self.cluster.appname + #component = 'benchmarker' + #configuration = '' + #jobs = self.cluster.get_jobs(app, component, self.code, configuration) + # success of job app = self.cluster.appname component = 'benchmarker' configuration = '' + #success = self.cluster.get_job_status(app=app, component=component, experiment=self.code, configuration=configuration) jobs = self.cluster.get_jobs(app, component, self.code, configuration) # all pods to these jobs pods = self.cluster.get_job_pods(app, component, self.code, configuration) - # status per pod - for p in pods: - status = self.cluster.get_pod_status(p) - self.cluster.logger.debug('job-pod {} has status {}'.format(p, status)) - #print(p,status) - if status == 'Succeeded': - #if status != 'Running': - self.cluster.store_pod_log(p) - self.cluster.delete_pod(p) - if status == 'Failed': - #if status != 'Running': - self.cluster.store_pod_log(p) - self.cluster.delete_pod(p) - # success of job - app = self.cluster.appname - component = 'benchmarker' - configuration = '' - success = self.cluster.get_job_status(app=app, component=component, experiment=self.code, configuration=configuration) - jobs = self.cluster.get_jobs(app, component, self.code, configuration) # status per job for job in jobs: success = self.cluster.get_job_status(job) self.cluster.logger.debug('job {} has success status {}'.format(job, success)) #print(job, success) if success: - self.end_benchmarking(job) + # status per pod + for p in pods: + status = self.cluster.get_pod_status(p) + self.cluster.logger.debug('job-pod {} has status {}'.format(p, status)) + #print(p,status) + if status == 'Succeeded': + #if status != 'Running': + self.cluster.store_pod_log(p) + self.cluster.delete_pod(p) + if status == 'Failed': + #if status != 'Running': + self.cluster.store_pod_log(p) + self.cluster.delete_pod(p) + self.end_benchmarking(job, config) self.cluster.delete_job(job) if len(pods) == 0 and len(jobs) == 0: do = False @@ -909,14 +956,107 @@ def benchmark_list(self, list_clients): self.cluster.delete_job(job) if len(pods) == 0 and len(jobs) == 0: break - def end_benchmarking(self, jobname): + def get_job_timing_benchmarking(self, jobname): + timing_benchmarker = self.extract_job_timing(jobname, container="dbmsbenchmarker") + return timing_benchmarker + def get_job_timing_loading(self, jobname): + timing_datagenerator = self.extract_job_timing(jobname, container="datagenerator") + timing_sensor = self.extract_job_timing(jobname, container="sensor") + timing_total = timing_datagenerator + timing_sensor + return timing_datagenerator, timing_sensor, timing_total + #return total_time, generator_time, loader_time + def extract_job_timing(self, jobname, container): + def get_job_timing(filename): + """ + Transforms a log file in text format into list of pairs of timing information. + This reads BEXHOMA_START and BEXHOMA_END + + :param filename: Name of the log file + :return: List of pairs (start,end) per pod + """ + try: + with open(filename) as f: + lines = f.readlines() + stdout = "".join(lines) + pod_name = filename[filename.rindex("-")+1:-len(".log")] + timing_start = re.findall('BEXHOMA_START:(.+?)\n', stdout)[0] + timing_end = re.findall('BEXHOMA_END:(.+?)\n', stdout)[0] + return (int(timing_start), int(timing_end)) + except Exception as e: + print(e) + return (0,0) + directory = os.fsencode(self.path) + #print(jobname) + timing = [] + for file in os.listdir(directory): + filename = os.fsdecode(file) + #if filename.startswith("bexhoma-loading-"+jobname) and filename.endswith(".{container}.log".format(container=container)): + if filename.startswith(jobname) and filename.endswith(".{container}.log".format(container=container)): + #print(filename) + (timing_start, timing_end) = get_job_timing(self.path+"/"+filename) + #print(df) + if (timing_start, timing_end) == (0,0): + print("Error in "+filename) + else: + timing.append((timing_start, timing_end)) + print(timing) + return timing + def end_benchmarking(self, jobname, config=None): """ Ends a benchmarker job. This is for storing or cleaning measures. :param jobname: Name of the job to clean + :param config: Configuration object """ self.cluster.logger.debug('default.end_benchmarking({})'.format(jobname)) + # mark pod with new end time and duration + job_labels = self.cluster.get_jobs_labels(app=self.cluster.appname, component='benchmarker', experiment=self.code) + if len(job_labels) > 0 and len(job_labels[jobname]) > 0: + # get pairs (start,end) of benchmarking pods + timing_benchmarker = self.get_job_timing_benchmarking(jobname) + if config is not None: + config.benchmarking_timespans = {} + config.benchmarking_timespans['benchmarker'] = timing_benchmarker + start_time = int(job_labels[jobname]['start_time']) + connection = job_labels[jobname]['connection'] + #self.timeLoadingEnd = default_timer() + #self.timeLoading = float(self.timeLoadingEnd) - float(self.timeLoadingStart) + #self.experiment.cluster.logger.debug("LOADING LABELS") + #self.experiment.cluster.logger.debug(self.timeLoading) + #self.experiment.cluster.logger.debug(float(self.timeLoadingEnd)) + #self.experiment.cluster.logger.debug(float(self.timeLoadingStart)) + #self.timeLoading = float(self.timeLoading) + float(timeLoading) + now = datetime.utcnow() + now_string = now.strftime('%Y-%m-%d %H:%M:%S') + time_now = str(datetime.now()) + end_time = int(datetime.timestamp(datetime.strptime(time_now,'%Y-%m-%d %H:%M:%S.%f'))) + self.cluster.logger.debug("BENCHMARKING LABELS") + self.cluster.logger.debug("connection: "+str(connection)) + self.cluster.logger.debug("start_time: "+str(start_time)) + self.cluster.logger.debug("end_time: "+str(end_time)) + self.cluster.logger.debug("duration: "+str(end_time-start_time)) + #fullcommand = 'label pods '+pod_sut+' --overwrite loaded=True timeLoadingEnd="{}" timeLoading={}'.format(time_now_int, self.timeLoading) + #print(fullcommand) + #self.experiment.cluster.kubectl(fullcommand) + # copy config to pod - dashboard + pods = self.cluster.get_pods(component='dashboard') + if len(pods) > 0: + pod_dashboard = pods[0] + # get monitoring for loading + if self.monitoring_active: + cmd = {} + cmd['fetch_benchmarking_metrics'] = 'python metrics.py -r /results/ -db -ct stream -c {} -cf {} -f {} -e {} -ts {} -te {}'.format(connection, connection+'.config', '/results/'+self.code, self.code, start_time, end_time) + #cmd['fetch_loading_metrics'] = 'python metrics.py -r /results/ -db -ct loading -c {} -cf {} -f {} -e {} -ts {} -te {}'.format(connection, c['name']+'.config', '/results/'+self.code, self.code, self.timeLoadingStart, self.timeLoadingEnd) + stdin, stdout, stderr = self.cluster.execute_command_in_pod(command=cmd['fetch_benchmarking_metrics'], pod=pod_dashboard, container="dashboard") + self.cluster.logger.debug(stdout) + self.cluster.logger.debug(stderr) + # upload connections infos again, metrics has overwritten it + filename = 'connections.config' + cmd['upload_connection_file'] = 'cp {from_file} {to} -c dashboard'.format(to=pod_dashboard+':/results/'+str(self.code)+'/'+filename, from_file=self.path+"/"+filename) + stdout = self.cluster.kubectl(cmd['upload_connection_file']) + self.cluster.logger.debug(stdout) + self.evaluator.end_benchmarking(jobname) def end_loading(self, jobname): """ Ends a loading job. @@ -925,6 +1065,7 @@ def end_loading(self, jobname): :param jobname: Name of the job to clean """ self.cluster.logger.debug('default.end_loading({})'.format(jobname)) + self.evaluator.end_loading(jobname) @@ -1009,6 +1150,7 @@ def __init__(self, self.set_experiment(script='SF'+str(SF)+'-index') self.cluster.set_experiments_configfolder('experiments/tpch') parameter.defaultParameters = {'SF': str(SF)} + self.set_additional_labels(SF=SF) self.set_queryfile(queryfile) self.set_workload( name = 'TPC-H Queries SF='+str(SF), @@ -1056,73 +1198,8 @@ def __init__(self, info = 'This experiment performs some TPC-C inspired workloads.' ) self.storage_label = 'tpch-'+str(SF) - def end_benchmarking(self,jobname): - """ - Ends a benchmarker job. - This is for storing or cleaning measures. - - :param jobname: Name of the job to clean - """ - #app = self.appname - #code = self.code - #experiment = code - #jobname = self.generate_component_name(app=app, component=component, experiment=experiment, configuration=configuration, client=str(client)) - #jobname = self.benchmarker_jobname - self.cluster.logger.debug('tpcc.end_benchmarking({})'.format(jobname)) - pods = self.cluster.get_pods(component='dashboard') - if len(pods) > 0: - pod_dashboard = pods[0] - status = self.cluster.get_pod_status(pod_dashboard) - print(pod_dashboard, status) - while status != "Running": - self.wait(10) - status = self.cluster.get_pod_status(pod_dashboard) - print(pod_dashboard, status) - filename_logs = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}/{}*'.format(self.code, jobname) - #filename_logs = '/results/{}/{}*'.format(self.code, jobname) - filename_df = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code)+'/'+jobname+'.df.pickle' - cmd = {} - # get connection name - cmd['extract_results'] = 'grep -R BEXHOMA_CONNECTION {filename_logs}'.format(filename_logs=filename_logs) - print(cmd['extract_results']) - stdout = os.popen(cmd['extract_results']).read() - #stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['extract_results'], pod=pod_dashboard, container="dashboard")#self.yamlfolder+deployment) - print(stdout) - connection_name = re.findall('BEXHOMA_CONNECTION:(.+?)\n', stdout) - # get NOPM and TPM - cmd['extract_results'] = 'grep -R RESULT {filename_logs}'.format(filename_logs=filename_logs) - print(cmd['extract_results']) - stdout = os.popen(cmd['extract_results']).read() - #stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['extract_results'], pod=pod_dashboard, container="dashboard")#self.yamlfolder+deployment) - print(stdout) - list_nopm = re.findall('achieved (.+?) NOPM', stdout) - list_tpm = re.findall('from (.+?) ', stdout) - # get vuser - cmd['extract_results'] = 'grep -H -R \'Active Virtual Users\' {filename_logs}'.format(filename_logs=filename_logs) - print(cmd['extract_results']) - stdout = os.popen(cmd['extract_results']).read() - #stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['extract_results'], pod=pod_dashboard, container="dashboard")#self.yamlfolder+deployment) - print(stdout) - #list_vuser = re.findall('Vuser 1:(.+?) Active', stdout) - list_vuser_pod = re.findall(str(self.code)+'-(.+?).log:Vuser 1:(.+?) Active', stdout) - list_pods = [x for (x,y) in list_vuser_pod] - list_vuser = [y for (x,y) in list_vuser_pod] - # what we have found - print(list_nopm) - print(list_tpm) - print(list_vuser) - print(list_pods) - # build DataFrame - if len(list_nopm) and len(list_tpm) and len(list_vuser): - df = pd.DataFrame(list(zip(list_nopm, list_tpm, list_vuser, list_pods))) - df.columns = ['NOPM','TPM', 'VUSERS', 'pods'] - if len(connection_name) > 0: - df.index.name = str(connection_name[0]) - print(df) - f = open(filename_df, "wb") - pickle.dump(df, f) - f.close() - #self.loading_parameters['HAMMERDB_VUSERS'] + self.jobtemplate_loading = "jobtemplate-loading-hammerdb.yml" + self.evaluator = evaluators.tpcc(code=self.code, path=self.cluster.resultfolder, include_loading=False, include_benchmarking=True) def test_results(self): """ Run test script locally. @@ -1130,57 +1207,62 @@ def test_results(self): :return: exit code of test script """ - try: - path = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}'.format(self.code) - #path = '../benchmarks/1669163583' - directory = os.fsencode(path) - for file in os.listdir(directory): - filename = os.fsdecode(file) - if filename.endswith(".pickle"): - df = pd.read_pickle(path+"/"+filename) - print(df) - print(df.index.name) - print(list(df['VUSERS'])) - print(" ".join(l)) - return 0 - except Exception as e: - return 1 + self.cluster.logger.debug('tpcc.test_results()') + self.evaluator.test_results() + workflow = self.get_workflow_list() + if workflow == self.evaluator.workflow: + print("Result workflow complete") + else: + print("Result workflow not complete") def evaluate_results(self, pod_dashboard=''): """ Build a DataFrame locally that contains all benchmarking results. This is specific to HammerDB. """ self.cluster.logger.debug('tpcc.evaluate_results()') - df_collected = None - path = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}'.format(self.code) - #path = '../benchmarks/1669640632' - directory = os.fsencode(path) - for file in os.listdir(directory): - filename = os.fsdecode(file) - if filename.startswith("bexhoma-benchmarker") and filename.endswith(".df.pickle"): - #print(filename) - df = pd.read_pickle(path+"/"+filename) - #df = self.log_to_df(path+"/"+filename) - #filename_df = path+"/"+filename+".df.pickle" - #f = open(filename_df, "wb") - #pickle.dump(df, f) - #f.close() - if not df.empty: - df['configuration'] = df.index.name - if df_collected is not None: - df_collected = pd.concat([df_collected, df]) - else: - df_collected = df.copy() - if not df_collected is None and not df_collected.empty: - df_collected['index'] = df_collected.index.map(str) - df_collected['connection'] = df_collected['configuration']+"-"+df_collected['index'] - df_collected.drop('index', axis=1, inplace=True) - df_collected.set_index('connection', inplace=True) - filename_df = path+"/bexhoma-benchmarker.all.df.pickle" - f = open(filename_df, "wb") - pickle.dump(df_collected, f) - f.close() - self.cluster.logger.debug(df_collected) + self.evaluator.evaluate_results(pod_dashboard) + if len(pod_dashboard) == 0: + pod_dashboard = self.cluster.get_dashboard_pod_name(component='dashboard') + if len(pod_dashboard) > 0: + #pod_dashboard = pods[0] + status = self.cluster.get_pod_status(pod_dashboard) + print(pod_dashboard, status) + while status != "Running": + self.wait(10) + status = self.cluster.get_pod_status(pod_dashboard) + print(pod_dashboard, status) + if self.monitoring_active: + cmd = {} + cmd['transform_benchmarking_metrics'] = 'python metrics.evaluation.py -r /results/ -db -ct loading -e {}'.format(self.code) + stdin, stdout, stderr = self.cluster.execute_command_in_pod(command=cmd['transform_benchmarking_metrics'], pod=pod_dashboard, container="dashboard") + self.cluster.logger.debug(stdout) + cmd['transform_benchmarking_metrics'] = 'python metrics.evaluation.py -r /results/ -db -ct stream -e {}'.format(self.code) + stdin, stdout, stderr = self.cluster.execute_command_in_pod(command=cmd['transform_benchmarking_metrics'], pod=pod_dashboard, container="dashboard") + self.cluster.logger.debug(stdout) + # copy logs and yamls to result folder + #print("Copy configuration and logs", end="", flush=True) + #directory = os.fsencode(self.path) + #for file in os.listdir(directory): + # filename = os.fsdecode(file) + # if filename.endswith(".log") or filename.endswith(".yml") or filename.endswith(".error") or filename.endswith(".pickle"): + # self.cluster.kubectl('cp '+self.path+"/"+filename+' '+pod_dashboard+':/results/'+str(self.code)+'/'+filename+' -c dashboard') + # print(".", end="", flush=True) + #print("done!") + cmd = {} + #cmd['update_dbmsbenchmarker'] = 'git pull'#/'+str(self.code) + #self.cluster.execute_command_in_pod(command=cmd['update_dbmsbenchmarker'], pod=pod_dashboard, container="dashboard") + #print("Join results ", end="", flush=True) + #cmd['merge_results'] = 'python merge.py -r /results/ -c '+str(self.code) + #self.cluster.execute_command_in_pod(command=cmd['merge_results'], pod=pod_dashboard, container="dashboard") + #print("done!") + #print("Build evaluation cube ", end="", flush=True) + #cmd['evaluate_results'] = 'python benchmark.py read -e yes -r /results/'+str(self.code) + #self.cluster.execute_command_in_pod(command=cmd['evaluate_results'], pod=pod_dashboard, container="dashboard") + #print("done!") + # download all results from cluster + #filename = 'evaluation.json' + cmd['download_results'] = 'cp {from_file} {to} -c dashboard'.format(from_file=pod_dashboard+':/results/'+str(self.code)+'/', to=self.path+"/") + self.cluster.kubectl(cmd['download_results']) @@ -1294,23 +1376,8 @@ def set_querymanagement_maintaining(self, ) #self.monitoring_active = True self.maintaining_active = True - def end_loading(self, jobname): - """ - Ends a loading job. - This is for storing or cleaning measures. - :param jobname: Name of the job to clean - """ - self.cluster.logger.debug('tsbs.end_loading({})'.format(jobname)) - filename_logs = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}/{}*'.format(self.code, jobname) - cmd = {} - # get connection name - cmd['extract_results'] = 'grep -R loaded {filename_logs}'.format(filename_logs=filename_logs) - print(cmd['extract_results']) - stdout = os.popen(cmd['extract_results']).read() - #stdin, stdout, stderr = self.experiment.cluster.execute_command_in_pod(command=cmd['extract_results'], pod=pod_dashboard, container="dashboard")#self.yamlfolder+deployment) - print(stdout) - return super().end_loading(jobname) + """ ############################################################################ @@ -1347,6 +1414,8 @@ def __init__(self, info = 'This experiment performs some YCSB inspired workloads.' ) self.storage_label = 'tpch-'+str(SF) + self.jobtemplate_loading = "jobtemplate-loading-ycsb.yml" + self.evaluator = evaluators.ycsb(code=self.code, path=self.cluster.resultfolder, include_loading=False, include_benchmarking=True) def log_to_df(self, filename): try: with open(filename) as f: @@ -1369,48 +1438,6 @@ def log_to_df(self, filename): except Exception as e: print(e) return pd.DataFrame() - def end_loading(self, jobname): - """ - Ends a loading job. - This is for storing or cleaning measures. - - :param jobname: Name of the job to clean - """ - self.cluster.logger.debug('ycsb.end_loading({})'.format(jobname)) - path = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}'.format(self.code) - #path = '../benchmarks/1669640632' - directory = os.fsencode(path) - for file in os.listdir(directory): - filename = os.fsdecode(file) - if filename.startswith("bexhoma-") and filename.endswith(".sensor.log"): - print(filename) - df = self.log_to_df(path+"/"+filename) - filename_df = path+"/"+filename+".df.pickle" - f = open(filename_df, "wb") - pickle.dump(df, f) - f.close() - return super().end_loading(jobname) - def end_benchmarking(self, jobname): - """ - Ends a benchmarker job. - This is for storing or cleaning measures. - - :param jobname: Name of the job to clean - """ - self.cluster.logger.debug('ycsb.end_benchmarking({})'.format(jobname)) - path = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}'.format(self.code) - #path = '../benchmarks/1669640632' - directory = os.fsencode(path) - for file in os.listdir(directory): - filename = os.fsdecode(file) - if filename.startswith("bexhoma-benchmarker") and filename.endswith(".log"): - print(filename) - df = self.log_to_df(path+"/"+filename) - filename_df = path+"/"+filename+".df.pickle" - f = open(filename_df, "wb") - pickle.dump(df, f) - f.close() - return super().end_benchmarking(jobname) def test_results(self): """ Run test script locally. @@ -1419,19 +1446,12 @@ def test_results(self): :return: exit code of test script """ self.cluster.logger.debug('ycsb.test_results()') - try: - path = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}'.format(self.code) - #path = '../benchmarks/1669163583' - directory = os.fsencode(path) - for file in os.listdir(directory): - filename = os.fsdecode(file) - if filename.endswith(".pickle"): - df = pd.read_pickle(path+"/"+filename) - print(filename) - print(df) - return 0 - except Exception as e: - return 1 + self.evaluator.test_results() + workflow = self.get_workflow_list() + if workflow == self.evaluator.workflow: + print("Result workflow complete") + else: + print("Result workflow not complete") def get_result_sum(self, df, category='[OVERALL]', type='Throughput(ops/sec)'): try: df2=df[df['type'] == type] @@ -1565,21 +1585,32 @@ def get_overview_benchmarking(self, dfs={}): def evaluate_results(self, pod_dashboard=''): """ Build a DataFrame locally that contains all benchmarking results. - This is specific to ycsb. + This is specific to YCSB. """ self.cluster.logger.debug('ycsb.evaluate_results()') - #path = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}'.format(self.code) - path = self.path - df = self.get_overview_loading() - filename_df = path+"/bexhoma-loading.all.df.pickle" - f = open(filename_df, "wb") - pickle.dump(df, f) - f.close() - df = self.get_overview_benchmarking() - filename_df = path+"/bexhoma-benchmarker.all.df.pickle" - f = open(filename_df, "wb") - pickle.dump(df, f) - f.close() + self.evaluator.evaluate_results(pod_dashboard) + # download results + if len(pod_dashboard) == 0: + pod_dashboard = self.cluster.get_dashboard_pod_name(component='dashboard') + if len(pod_dashboard) > 0: + #pod_dashboard = pods[0] + status = self.cluster.get_pod_status(pod_dashboard) + print(pod_dashboard, status) + while status != "Running": + self.wait(10) + status = self.cluster.get_pod_status(pod_dashboard) + print(pod_dashboard, status) + if self.monitoring_active: + cmd = {} + cmd['transform_benchmarking_metrics'] = 'python metrics.evaluation.py -r /results/ -db -ct loading -e {}'.format(self.code) + stdin, stdout, stderr = self.cluster.execute_command_in_pod(command=cmd['transform_benchmarking_metrics'], pod=pod_dashboard, container="dashboard") + self.cluster.logger.debug(stdout) + cmd['transform_benchmarking_metrics'] = 'python metrics.evaluation.py -r /results/ -db -ct stream -e {}'.format(self.code) + stdin, stdout, stderr = self.cluster.execute_command_in_pod(command=cmd['transform_benchmarking_metrics'], pod=pod_dashboard, container="dashboard") + self.cluster.logger.debug(stdout) + cmd = {} + cmd['download_results'] = 'cp {from_file} {to} -c dashboard'.format(from_file=pod_dashboard+':/results/'+str(self.code)+'/', to=self.path+"/") + self.cluster.kubectl(cmd['download_results']) def get_result(self, component='loading'): #path = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}'.format(self.code) path = self.path @@ -1683,6 +1714,8 @@ def __init__(self, info = 'This experiment performs some Benchbase workloads.' ) self.storage_label = 'tpch-'+str(SF) + self.jobtemplate_loading = "jobtemplate-loading-benchbase.yml" + self.evaluator = evaluators.benchbase(code=self.code, path=self.cluster.resultfolder, include_loading=False, include_benchmarking=True) def log_to_df(self, filename): self.cluster.logger.debug('benchbase.log_to_df({})'.format(filename)) try: @@ -1708,96 +1741,6 @@ def get_parts_of_name(self, name): parts_name = re.findall('{(.+?)}', self.name_format) parts_values = re.findall('-(.+?)-', "-"+name.replace("-","--")+"--") return dict(zip(parts_name, parts_values)) - def end_loading(self, jobname): - """ - Ends a loading job. - This is for storing or cleaning measures. - Currently does nothing, since loading does not generate measures. - - :param jobname: Name of the job to clean - """ - return super().end_loading(jobname) - # legacy code - self.cluster.logger.debug('benchbase.end_loading({})'.format(jobname)) - #df_collected = None - path = self.path - #path = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}'.format(self.code) - directory = os.fsencode(path) - for file in os.listdir(directory): - filename = os.fsdecode(file) - if filename.startswith("bexhoma-") and filename.endswith(".sensor.log"): - print(filename) - df = self.log_to_df(path+"/"+filename) - filename_df = path+"/"+filename+".df.pickle" - f = open(filename_df, "wb") - pickle.dump(df, f) - f.close() - """ - if not df.empty: - if self.name_format is not None: - name_parts = self.get_parts_of_name(df.index.name) - print(name_parts) - for col, value in name_parts.items(): - df[col] = value - df['connection'] = df.index.name - if df_collected is not None: - df_collected = pd.concat([df_collected, df]) - else: - df_collected = df.copy() - """ - """ - if not df_collected is None and not df_collected.empty: - df_collected.set_index('connection', inplace=True) - filename_df = path+"/"+jobname+".all.df.pickle" - f = open(filename_df, "wb") - pickle.dump(df_collected, f) - f.close() - """ - return super().end_loading(jobname) - def end_benchmarking(self, jobname): - """ - Ends a benchmarker job. - This is for storing or cleaning measures. - - :param jobname: Name of the job to clean - """ - self.cluster.logger.debug('benchbase.end_benchmarking({})'.format(jobname)) - #df_collected = None - #path = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}'.format(self.code) - path = self.path - directory = os.fsencode(path) - for file in os.listdir(directory): - filename = os.fsdecode(file) - #if filename.startswith("bexhoma-benchmarker") and filename.endswith(".log"): - if filename.startswith(jobname) and filename.endswith(".log"): - #print(filename) - df = self.log_to_df(path+"/"+filename) - filename_df = path+"/"+filename+".df.pickle" - f = open(filename_df, "wb") - pickle.dump(df, f) - f.close() - """ - if not df.empty: - if self.name_format is not None: - name_parts = self.get_parts_of_name(df.index.name) - print(name_parts) - for col, value in name_parts.items(): - df[col] = value - df['connection'] = df.index.name - if df_collected is not None: - df_collected = pd.concat([df_collected, df]) - else: - df_collected = df.copy() - """ - """ - if not df_collected is None and not df_collected.empty: - df_collected.set_index('connection', inplace=True) - filename_df = path+"/"+jobname+".all.df.pickle" - f = open(filename_df, "wb") - pickle.dump(df_collected, f) - f.close() - """ - return super().end_benchmarking(jobname) def test_results(self): """ Run test script locally. @@ -1806,64 +1749,40 @@ def test_results(self): :return: exit code of test script """ self.cluster.logger.debug('benchbase.test_results()') - try: - #path = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}'.format(self.code) - path = self.path - #path = '../benchmarks/1669163583' - directory = os.fsencode(path) - for file in os.listdir(directory): - filename = os.fsdecode(file) - if filename.endswith(".pickle"): - df = pd.read_pickle(path+"/"+filename) - print(filename) - print(df) - return 0 - except Exception as e: - return 1 + self.evaluator.test_results() + workflow = self.get_workflow_list() + if workflow == self.evaluator.workflow: + print("Result workflow complete") + else: + print("Result workflow not complete") def evaluate_results(self, pod_dashboard=''): """ Build a DataFrame locally that contains all benchmarking results. - This is specific to benchbase. + This is specific to Benchbase. """ - #self.cluster.logger.debug('benchbase.evaluate_results()') - df_collected = None - #path = self.cluster.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+'/{}'.format(self.code) - path = self.path - #path = '../benchmarks/1669640632' - directory = os.fsencode(path) - for file in os.listdir(directory): - filename = os.fsdecode(file) - if filename.startswith("bexhoma-benchmarker") and filename.endswith(".log.df.pickle"): - #print(filename) - #df = self.log_to_df(path+"/"+filename) - df = pd.read_pickle(path+"/"+filename) - #filename_df = path+"/"+filename+".df.pickle" - #f = open(filename_df, "wb") - #pickle.dump(df, f) - #f.close() - if not df.empty: - if self.name_format is not None: - name_parts = self.get_parts_of_name(df.index.name) - #print(name_parts) - for col, value in name_parts.items(): - df[col] = value - df['configuration'] = df.index.name - if df_collected is not None: - df_collected = pd.concat([df_collected, df]) - else: - df_collected = df.copy() - if not df_collected is None and not df_collected.empty: - df_collected['index'] = (df_collected.groupby('configuration').cumcount() + 1).map(str)#df_collected.index.map(str) - df_collected['connection'] = df_collected['configuration']+"-"+df_collected['index'] - df_collected.drop('index', axis=1, inplace=True) - df_collected.set_index('connection', inplace=True) - filename_df = path+"/bexhoma-benchmarker.all.df.pickle" - f = open(filename_df, "wb") - pickle.dump(df_collected, f) - f.close() - #print(df_collected) - return df_collected - #self.cluster.logger.debug(df_collected) - + self.cluster.logger.debug('benchbase.evaluate_results()') + self.evaluator.evaluate_results(pod_dashboard) + # download results + if len(pod_dashboard) == 0: + pod_dashboard = self.cluster.get_dashboard_pod_name(component='dashboard') + if len(pod_dashboard) > 0: + #pod_dashboard = pods[0] + status = self.cluster.get_pod_status(pod_dashboard) + print(pod_dashboard, status) + while status != "Running": + self.wait(10) + status = self.cluster.get_pod_status(pod_dashboard) + print(pod_dashboard, status) + if self.monitoring_active: + cmd = {} + cmd['transform_benchmarking_metrics'] = 'python metrics.evaluation.py -r /results/ -db -ct loading -e {}'.format(self.code) + stdin, stdout, stderr = self.cluster.execute_command_in_pod(command=cmd['transform_benchmarking_metrics'], pod=pod_dashboard, container="dashboard") + self.cluster.logger.debug(stdout) + cmd['transform_benchmarking_metrics'] = 'python metrics.evaluation.py -r /results/ -db -ct stream -e {}'.format(self.code) + stdin, stdout, stderr = self.cluster.execute_command_in_pod(command=cmd['transform_benchmarking_metrics'], pod=pod_dashboard, container="dashboard") + self.cluster.logger.debug(stdout) + cmd = {} + cmd['download_results'] = 'cp {from_file} {to} -c dashboard'.format(from_file=pod_dashboard+':/results/'+str(self.code)+'/', to=self.path+"/") + self.cluster.kubectl(cmd['download_results']) diff --git a/requirements.txt b/requirements.txt index 38629661..5eb6395f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ psutil>=5.6.1 dbmsbenchmarker>=0.11.20 m2r2 myst_parser +HiYaPyCo==0.5.1