Skip to content

Commit

Permalink
V0.6.0 Introduced Cloud-Orchestration and Scalable Loading and Mainta…
Browse files Browse the repository at this point in the history
…ining Components (#102)

* Prepare next release

* Masterscript: maintaining does not change timer settings of benchmarker

* Masterscript: reconnect and try again, if not failed due to "not found"

* Masterscript: improved output about workflow

* Masterscript: aws example nodegroup scale

* Masterscript: aws example nodegroup get size

* Masterscript: aws example nodegroup wait for size

* Masterscript: aws example nodegroup show size

* Masterscript: aws example nodegroup show and check size

* Masterscript: aws example nodegroup name and type

* Masterscript: aws example dict of nodegroups

* Masterscript: aws example nodegroup name necessary for scaling

* Masterscript: aws example nodegroup name and type

* Masterscript: maintaining duration default 4h

* Masterscript: maintaining parameters and nodeSelector

* Masterscript: nodeSelector for sut, monitoring and benchmarker

* Masterscript: maintaining is accepted running also when num_maintaining=0

* Masterscript: request resources from command line

* Masterscript: prepare max_sut per cluster and per experiment

* Masterscript: catch json exception in getNode()

* Masterscript: maintaining example TSBS as experiment setup

* Masterscript: jobtemplate_maintaining per experiment

* Masterscript: initContainers in maintaining

* Masterscript: maintaining also watches succeeded pods

* Masterscript: maintaining also respects (longly) pending pods

* Masterscript: loading pods controlled by redis queue

* Masterscript: loading pods controlled by redis queue, include params

* Masterscript: initContainers parameters set correctly

* Masterscript: Stop also loading jobs and pods

* Masterscript: Number of parallel loaders

* Masterscript: Empty schema before loading pods

* Masterscript: Stop also loading jobs and pods when putting sut down

* Masterscript: Loading only finished, when outside and inside cluster are done

* Masterscript: Stop also loading jobs and pods - in all configurations

* Masterscript: Stop also loading jobs and pods - in all configurations (config, experiment, cluster)

* Masterscript: Check status of parallel loading

* Masterscript: Job status explained

* Masterscript: Job status returns true iff all pods are completed

* Masterscript: Job status more output

* Masterscript: Job status returns true iff all pods are completed

* Masterscript: Job status returns true iff all pods are completed, then delete all loading pods

* Masterscript: Job status returns true iff all pods are completed, copy loading pods logs

* Masterscript: Copy logs of all containers of loading pods

* Masterscript: Mark SUT as loaded as soon as realizing all pods have status success - include this as timeLoading

* Masterscript: Use maintaining structure for setting loading parameters

* Masterscript: Mark SUT as loaded

* Masterscript: Mark SUT as loaded, read old labels at first

* Masterscript: Mark SUT as loaded, read old labels at first and convert to float

* Masterscript: Mark SUT as loaded, read old labels at first and convert to float, debug output

* Masterscript: Mark SUT as loaded, read old labels at first and convert to int

* Masterscript: Mark SUT as loaded, read old labels at first and convert to int, cleaned
  • Loading branch information
perdelt committed Oct 18, 2022
1 parent 0929756 commit ec6bd6e
Show file tree
Hide file tree
Showing 7 changed files with 676 additions and 92 deletions.
75 changes: 75 additions & 0 deletions bexhoma/clusters.py
Expand Up @@ -65,3 +65,78 @@ def store_pod_log(self, pod_name, container=''):




class aws(kubernetes):
def __init__(self, clusterconfig='cluster.config', configfolder='experiments/', yamlfolder='k8s/', context=None, code=None, instance=None, volume=None, docker=None, script=None, queryfile=None):
self.code = code
kubernetes.__init__(self, clusterconfig=clusterconfig, configfolder=configfolder, context=context, yamlfolder=yamlfolder, code=self.code, instance=instance, volume=volume, docker=docker, script=script, queryfile=queryfile)
self.cluster = self.contextdata['cluster']
def eksctl(self, command):
#fullcommand = 'eksctl --context {context} {command}'.format(context=self.context, command=command)
fullcommand = 'eksctl {command}'.format(command=command)
self.logger.debug('aws.eksctl({})'.format(fullcommand))
#print(fullcommand)
return os.popen(fullcommand).read()# os.system(fullcommand)
def getNodes(self, app='', nodegroup_type='', nodegroup_name=''):
self.logger.debug('aws.getNodes()')
label = ''
if len(app)==0:
app = self.appname
label += 'app='+app
if len(nodegroup_type)>0:
label += ',type='+nodegroup_type
if len(nodegroup_name)>0:
label += ',alpha.eksctl.io/nodegroup-name='+nodegroup_name
try:
api_response = self.v1core.list_node(label_selector=label)
#pprint(api_response)
if len(api_response.items) > 0:
return api_response.items
else:
return []
except ApiException as e:
print("Exception when calling CoreV1Api->list_node for getNodes: %s\n" % e)
print("Create new access token")
self.cluster_access()
self.wait(2)
return self.getNodes(app=app, nodegroup_type=nodegroup_type, nodegroup_name=nodegroup_name)
def scale_nodegroups(self, nodegroup_names, size=None):
print("aws.scale_nodegroups({nodegroup_names}, {size})".format(nodegroup_names=nodegroup_names, size=size))
for nodegroup_name, size_default in nodegroup_names.items():
if size is not None:
size_default = size
self.scale_nodegroup(nodegroup_name, size_default)
def scale_nodegroup(self, nodegroup_name, size):
print("aws.scale_nodegroup({nodegroup_name}, {size})".format(nodegroup_name=nodegroup_name, size=size))
if not self.check_nodegroup(nodegroup_name=nodegroup_name, num_nodes_aux_planned=size):
#fullcommand = "eksctl scale nodegroup --cluster=Test-2 --nodes=0 --nodes-min=0 --name=Kleine_Gruppe"
command = "scale nodegroup --cluster={cluster} --nodes={size} --name={nodegroup_name}".format(cluster=self.cluster, size=size, nodegroup_name=nodegroup_name)
return self.eksctl(command)
#if not self.check_nodegroup(nodegroup_type, num_nodes_aux_planned):
# command = "scale nodegroup --cluster={cluster} --nodes={size} --name={nodegroup}".format(cluster=self.cluster, size=size, nodegroup=nodegroup)
# return self.eksctl(command)
#else:
# return ""
def get_nodegroup_size(self, nodegroup_type='', nodegroup_name=''):
resp = self.getNodes(nodegroup_type=nodegroup_type, nodegroup_name=nodegroup_name)
num_nodes_aux_actual = len(resp)
self.logger.debug('aws.get_nodegroup_size({},{}) = {}'.format(nodegroup_type, nodegroup_name, num_nodes_aux_actual))
return num_nodes_aux_actual
def check_nodegroup(self, nodegroup_type='', nodegroup_name='', num_nodes_aux_planned=0):
num_nodes_aux_actual = self.get_nodegroup_size(nodegroup_type=nodegroup_type, nodegroup_name=nodegroup_name)
self.logger.debug('aws.check_nodegroup({}, {}, {}) = {}'.format(nodegroup_type, nodegroup_name, num_nodes_aux_planned, num_nodes_aux_actual))
return num_nodes_aux_planned == num_nodes_aux_actual
def wait_for_nodegroups(self, nodegroup_names, size=None):
print("aws.wait_for_nodegroups({nodegroup_names})".format(nodegroup_names=nodegroup_names))
for nodegroup_name, size_default in nodegroup_names.items():
if size is not None:
size_default = size
self.wait_for_nodegroup(nodegroup_name=nodegroup_name, num_nodes_aux_planned=size_default)
def wait_for_nodegroup(self, nodegroup_type='', nodegroup_name='', num_nodes_aux_planned=0):
while (not self.check_nodegroup(nodegroup_type=nodegroup_type, nodegroup_name=nodegroup_name, num_nodes_aux_planned=num_nodes_aux_planned)):
self.wait(30)
print("Nodegroup {},{} ready".format(nodegroup_type, nodegroup_name))
return True



0 comments on commit ec6bd6e

Please sign in to comment.