Skip to content

Commit ec6bd6e

Browse files
authored
V0.6.0 Introduced Cloud-Orchestration and Scalable Loading and Maintaining Components (#102)
* Prepare next release * Masterscript: maintaining does not change timer settings of benchmarker * Masterscript: reconnect and try again, if not failed due to "not found" * Masterscript: improved output about workflow * Masterscript: aws example nodegroup scale * Masterscript: aws example nodegroup get size * Masterscript: aws example nodegroup wait for size * Masterscript: aws example nodegroup show size * Masterscript: aws example nodegroup show and check size * Masterscript: aws example nodegroup name and type * Masterscript: aws example dict of nodegroups * Masterscript: aws example nodegroup name necessary for scaling * Masterscript: aws example nodegroup name and type * Masterscript: maintaining duration default 4h * Masterscript: maintaining parameters and nodeSelector * Masterscript: nodeSelector for sut, monitoring and benchmarker * Masterscript: maintaining is accepted running also when num_maintaining=0 * Masterscript: request resources from command line * Masterscript: prepare max_sut per cluster and per experiment * Masterscript: catch json exception in getNode() * Masterscript: maintaining example TSBS as experiment setup * Masterscript: jobtemplate_maintaining per experiment * Masterscript: initContainers in maintaining * Masterscript: maintaining also watches succeeded pods * Masterscript: maintaining also respects (longly) pending pods * Masterscript: loading pods controlled by redis queue * Masterscript: loading pods controlled by redis queue, include params * Masterscript: initContainers parameters set correctly * Masterscript: Stop also loading jobs and pods * Masterscript: Number of parallel loaders * Masterscript: Empty schema before loading pods * Masterscript: Stop also loading jobs and pods when putting sut down * Masterscript: Loading only finished, when outside and inside cluster are done * Masterscript: Stop also loading jobs and pods - in all configurations * Masterscript: Stop also loading jobs and pods - in all configurations (config, experiment, cluster) * Masterscript: Check status of parallel loading * Masterscript: Job status explained * Masterscript: Job status returns true iff all pods are completed * Masterscript: Job status more output * Masterscript: Job status returns true iff all pods are completed * Masterscript: Job status returns true iff all pods are completed, then delete all loading pods * Masterscript: Job status returns true iff all pods are completed, copy loading pods logs * Masterscript: Copy logs of all containers of loading pods * Masterscript: Mark SUT as loaded as soon as realizing all pods have status success - include this as timeLoading * Masterscript: Use maintaining structure for setting loading parameters * Masterscript: Mark SUT as loaded * Masterscript: Mark SUT as loaded, read old labels at first * Masterscript: Mark SUT as loaded, read old labels at first and convert to float * Masterscript: Mark SUT as loaded, read old labels at first and convert to float, debug output * Masterscript: Mark SUT as loaded, read old labels at first and convert to int * Masterscript: Mark SUT as loaded, read old labels at first and convert to int, cleaned
1 parent 0929756 commit ec6bd6e

File tree

7 files changed

+676
-92
lines changed

7 files changed

+676
-92
lines changed

bexhoma/clusters.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,78 @@ def store_pod_log(self, pod_name, container=''):
6565

6666

6767

68+
69+
class aws(kubernetes):
70+
def __init__(self, clusterconfig='cluster.config', configfolder='experiments/', yamlfolder='k8s/', context=None, code=None, instance=None, volume=None, docker=None, script=None, queryfile=None):
71+
self.code = code
72+
kubernetes.__init__(self, clusterconfig=clusterconfig, configfolder=configfolder, context=context, yamlfolder=yamlfolder, code=self.code, instance=instance, volume=volume, docker=docker, script=script, queryfile=queryfile)
73+
self.cluster = self.contextdata['cluster']
74+
def eksctl(self, command):
75+
#fullcommand = 'eksctl --context {context} {command}'.format(context=self.context, command=command)
76+
fullcommand = 'eksctl {command}'.format(command=command)
77+
self.logger.debug('aws.eksctl({})'.format(fullcommand))
78+
#print(fullcommand)
79+
return os.popen(fullcommand).read()# os.system(fullcommand)
80+
def getNodes(self, app='', nodegroup_type='', nodegroup_name=''):
81+
self.logger.debug('aws.getNodes()')
82+
label = ''
83+
if len(app)==0:
84+
app = self.appname
85+
label += 'app='+app
86+
if len(nodegroup_type)>0:
87+
label += ',type='+nodegroup_type
88+
if len(nodegroup_name)>0:
89+
label += ',alpha.eksctl.io/nodegroup-name='+nodegroup_name
90+
try:
91+
api_response = self.v1core.list_node(label_selector=label)
92+
#pprint(api_response)
93+
if len(api_response.items) > 0:
94+
return api_response.items
95+
else:
96+
return []
97+
except ApiException as e:
98+
print("Exception when calling CoreV1Api->list_node for getNodes: %s\n" % e)
99+
print("Create new access token")
100+
self.cluster_access()
101+
self.wait(2)
102+
return self.getNodes(app=app, nodegroup_type=nodegroup_type, nodegroup_name=nodegroup_name)
103+
def scale_nodegroups(self, nodegroup_names, size=None):
104+
print("aws.scale_nodegroups({nodegroup_names}, {size})".format(nodegroup_names=nodegroup_names, size=size))
105+
for nodegroup_name, size_default in nodegroup_names.items():
106+
if size is not None:
107+
size_default = size
108+
self.scale_nodegroup(nodegroup_name, size_default)
109+
def scale_nodegroup(self, nodegroup_name, size):
110+
print("aws.scale_nodegroup({nodegroup_name}, {size})".format(nodegroup_name=nodegroup_name, size=size))
111+
if not self.check_nodegroup(nodegroup_name=nodegroup_name, num_nodes_aux_planned=size):
112+
#fullcommand = "eksctl scale nodegroup --cluster=Test-2 --nodes=0 --nodes-min=0 --name=Kleine_Gruppe"
113+
command = "scale nodegroup --cluster={cluster} --nodes={size} --name={nodegroup_name}".format(cluster=self.cluster, size=size, nodegroup_name=nodegroup_name)
114+
return self.eksctl(command)
115+
#if not self.check_nodegroup(nodegroup_type, num_nodes_aux_planned):
116+
# command = "scale nodegroup --cluster={cluster} --nodes={size} --name={nodegroup}".format(cluster=self.cluster, size=size, nodegroup=nodegroup)
117+
# return self.eksctl(command)
118+
#else:
119+
# return ""
120+
def get_nodegroup_size(self, nodegroup_type='', nodegroup_name=''):
121+
resp = self.getNodes(nodegroup_type=nodegroup_type, nodegroup_name=nodegroup_name)
122+
num_nodes_aux_actual = len(resp)
123+
self.logger.debug('aws.get_nodegroup_size({},{}) = {}'.format(nodegroup_type, nodegroup_name, num_nodes_aux_actual))
124+
return num_nodes_aux_actual
125+
def check_nodegroup(self, nodegroup_type='', nodegroup_name='', num_nodes_aux_planned=0):
126+
num_nodes_aux_actual = self.get_nodegroup_size(nodegroup_type=nodegroup_type, nodegroup_name=nodegroup_name)
127+
self.logger.debug('aws.check_nodegroup({}, {}, {}) = {}'.format(nodegroup_type, nodegroup_name, num_nodes_aux_planned, num_nodes_aux_actual))
128+
return num_nodes_aux_planned == num_nodes_aux_actual
129+
def wait_for_nodegroups(self, nodegroup_names, size=None):
130+
print("aws.wait_for_nodegroups({nodegroup_names})".format(nodegroup_names=nodegroup_names))
131+
for nodegroup_name, size_default in nodegroup_names.items():
132+
if size is not None:
133+
size_default = size
134+
self.wait_for_nodegroup(nodegroup_name=nodegroup_name, num_nodes_aux_planned=size_default)
135+
def wait_for_nodegroup(self, nodegroup_type='', nodegroup_name='', num_nodes_aux_planned=0):
136+
while (not self.check_nodegroup(nodegroup_type=nodegroup_type, nodegroup_name=nodegroup_name, num_nodes_aux_planned=num_nodes_aux_planned)):
137+
self.wait(30)
138+
print("Nodegroup {},{} ready".format(nodegroup_type, nodegroup_name))
139+
return True
140+
141+
142+

0 commit comments

Comments
 (0)