Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate OS-kubespray cluster before provision #334

Merged
merged 2 commits into from
Jul 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion kqueen/auth/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
def generate_auth_options(auth_list):
auth_options = {}

methods = auth_list.strip().split(',')
methods = [item.strip() for item in auth_list.split(',')]
for m in methods:
if m in AUTH_MODULES:
auth_options[m] = AUTH_MODULES[m]
Expand Down
75 changes: 52 additions & 23 deletions kqueen/engines/openstack_kubespray.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from kqueen import kubeapi

import base64
import ipaddress
import json
import logging
import openstack
Expand Down Expand Up @@ -103,6 +104,9 @@ class OpenstackKubesprayEngine(BaseEngine):
"order": 70,
"label": "Comma separated list of nameservers",
"default": config.KS_DEFAULT_NAMESERVERS,
"validators": {
"ip_list": True,
},
},
"availability_zone": {
"type": "text",
Expand Down Expand Up @@ -196,15 +200,17 @@ def __init__(self, cluster, *args, **kwargs):

def provision(self):
try:
self.os.validate_cluster_metadata()

self.cluster.state = config.CLUSTER_PROVISIONING_STATE
self.cluster.save()
app.executor.submit(self._run_provisioning)
except Exception as e:
message = "Failed to submit provisioning task: %s" % e
logger.exception(message)
self.cluster.state = config.CLUSTER_ERROR_STATE
self.cluster.metadata['status_message'] = repr(e)
self.cluster.save()
return False, e
# Reraise error to return correct error code
raise e
return True, None

def _run_provisioning(self):
Expand All @@ -221,6 +227,7 @@ def _run_provisioning(self):
except Exception as e:
self.cluster.state = config.CLUSTER_ERROR_STATE
logger.exception("Failed to provision cluster: %s" % e)
self.cluster.metadata['status_message'] = getattr(e, 'details', repr(e))
finally:
self.cluster.save()

Expand Down Expand Up @@ -561,38 +568,60 @@ def __init__(self, stack_name, *, os_kwargs, cluster, extra_ssh_key):
self.extra_ssh_key = extra_ssh_key
self.stack_name = stack_name
self.os_kwargs = os_kwargs
self.meta = {}

def validate_cluster_metadata(self):
def validate_ip(address):
address = address.strip()
# Raises ValueError if IP address is not valid
ipaddress.ip_address(address)
return address

mc = self.cluster.metadata["master_count"]
if mc % 2 == 0 or mc == 1:
raise ValueError("Master node count must be an odd number at least 3 or greater")
self.meta["master_count"] = mc
self.meta["slave_count"] = self.cluster.metadata["slave_count"]

self.meta["dns"] = [validate_ip(ip) for ip in

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest to place it after all is None checks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why? I placed it here since it performs faster then checks below, so if error will be here, response will be sent faster

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it faster, I don't get it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since it doesn't send api call to the openstack

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, okay

self.cluster.metadata.get("dns_nameservers", []).split(",")]

self.meta["ext_net"] = self.c.get_network(self.cluster.metadata["floating_network"])
if self.meta["ext_net"] is None:
raise ValueError("External network '%s' is not found" % self.cluster.metadata["floating_network"])
self.meta["image"] = self.c.get_image(self.cluster.metadata["image_name"])
if self.meta["image"] is None:
raise ValueError("Image '%s' is not found" % self.cluster.metadata["image_name"])
self.meta["flavor"] = self.c.get_flavor(self.cluster.metadata["flavor"])
if self.meta["flavor"] is None:
raise ValueError("Flavor '%s' is not found" % self.cluster.metadata["flavor"])

azone = self.cluster.metadata.get("availability_zone")
if azone and azone not in self.c.list_availability_zone_names():
raise ValueError("Availability zone '{}' is not found".format(self.cluster.metadata["availability_zone"]))
else:
self.meta["azone"] = azone

def provision(self):
master_count = self.cluster.metadata["master_count"]
slave_count = self.cluster.metadata["slave_count"]
dns = self.cluster.metadata["dns_nameservers"].split(",")
ext_net = self.c.get_network(self.cluster.metadata["floating_network"])
if ext_net is None:
raise Exception("External network %s not found" % self.cluster.metadata["floating_network"])
image = self.c.get_image(self.cluster.metadata["image_name"])
if image is None:
raise Exception("Image %s not found" % self.cluster.metadata["image_name"])
flavor = self.c.get_flavor(self.cluster.metadata["flavor"])
if flavor is None:
raise Exception("Flavor %s not found" % self.cluster.metadata["flavor"])

resources = {
"masters": [],
"slaves": [],
}
network = self.c.create_network(self.stack_name)
subnet = self.c.create_subnet(network, cidr="10.1.0.0/16",
subnet_name=self.stack_name,
dns_nameservers=dns)
dns_nameservers=self.meta['dns'])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe get() ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some fields are required and some are not, let me also send 400 if required field is not provided

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okok

router = self.c.create_router(name=self.stack_name,
ext_gateway_net_id=ext_net.id)
ext_gateway_net_id=self.meta['ext_net'].id)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe get() ?

self.c.add_router_interface(router, subnet["id"])
resources["router_id"] = router["id"]
resources["network_id"] = network["id"]
resources["subnet_id"] = subnet["id"]
for master in self._boot_servers(name=self.stack_name,
servers_range=range(master_count),
image=image,
flavor=flavor,
servers_range=range(self.meta["master_count"]),
image=self.meta['image'],
flavor=self.meta['flavor'],
network=network):
fip = self.c.create_floating_ip("public", server=master)
resources["masters"].append({
Expand All @@ -602,9 +631,9 @@ def provision(self):
"hostname": master.name,
})
for slave in self._boot_servers(name=self.stack_name,
servers_range=range(slave_count),
image=image,
flavor=flavor,
servers_range=range(self.meta["slave_count"]),
image=self.meta['image'],
flavor=self.meta['flavor'],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe get() ?

network=network,
add_random_suffix=True):
resources["slaves"].append({
Expand Down
2 changes: 1 addition & 1 deletion kqueen/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def update_state(self):
return self.state
self.state = remote_cluster['state']
else:
self.state = config.get('CLUSTER_UNKNOWN_STATE')
return config.get('CLUSTER_UNKNOWN_STATE')

# Check for stale clusters
max_age = timedelta(seconds=config.get('PROVISIONER_TIMEOUT'))
Expand Down