Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

K8S: RFC-1123 compliant job names #712

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
51 changes: 42 additions & 9 deletions metaflow/plugins/aws/eks/kubernetes.py
Expand Up @@ -5,6 +5,7 @@
import shlex
import time
import re
import hashlib

from metaflow import util
from metaflow.datastore.util.s3tail import S3Tail
Expand Down Expand Up @@ -44,6 +45,40 @@ class KubernetesKilledException(MetaflowException):
headline = "Kubernetes Batch job killed"


def generate_rfc1123_name(flow_name,
run_id,
step_name,
task_id,
attempt
):
"""
Generate RFC 1123 compatible name. Specifically, the format is:
<let-or-digit>[*[<let-or-digit-or-hyphen>]<let-or-digit>]

The generated name consists from a human-readable prefix, derived from
flow/step/task/attempt, and a hash suffux.
"""
long_name = "-".join(
[
flow_name,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically flow_name can start with an underscore, which will make the output of this function non-compliant.

Also, we would need to sanitize the strings for labels as well - we can (at a later stage) make this a utility method.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Also I realized that the previous version could generate identical names for HelloFlow and Helloflow, fixed that too

run_id,
step_name,
task_id,
attempt,
]
)
hash = hashlib.sha256(long_name.encode('utf-8')).hexdigest()

if long_name.startswith('_'):
# RFC 1123 names can't start with hyphen so slap an extra prefix on it
sanitized_long_name = 'u' + long_name.replace('_', '-').lower()
else:
sanitized_long_name = long_name.replace('_', '-').lower()

# the name has to be under 63 chars total
return sanitized_long_name[:57] + '-' + hash[:5]


class Kubernetes(object):
def __init__(
self,
Expand Down Expand Up @@ -150,15 +185,13 @@ def create_job(
# attempt_id while submitting the job to the Kubernetes cluster. If
# that is indeed the case, we can rely on Kubernetes to generate a name
# for us.
job_name = "-".join(
[
self._flow_name,
self._run_id,
self._step_name,
self._task_id,
self._attempt,
]
).lower()
job_name = generate_rfc1123_name(
self._flow_name,
self._run_id,
self._step_name,
self._task_id,
self._attempt,
)

job = (
KubernetesClient()
Expand Down
26 changes: 26 additions & 0 deletions test/unit/test_k8s_job_name_sanitizer.py
@@ -0,0 +1,26 @@
import re
from metaflow.plugins.aws.eks.kubernetes import generate_rfc1123_name

rfc1123 = re.compile(r'^[a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?$')

def test_job_name_santitizer():
# Basic name
assert rfc1123.match(generate_rfc1123_name('HelloFlow', '1', 'end', '321', '1'))

# Step name ends with _
assert rfc1123.match(generate_rfc1123_name('HelloFlow', '1', '_end', '321', '1'))

# Step name starts and ends with _
assert rfc1123.match(generate_rfc1123_name('HelloFlow', '1', '_end_', '321', '1'))

# Flow name ends with _
assert rfc1123.match(generate_rfc1123_name('HelloFlow_', '1', 'end', '321', '1'))

# Same flow name, different case must produce different job names
assert generate_rfc1123_name('Helloflow', '1', 'end', '321', '1') != generate_rfc1123_name('HelloFlow', '1', 'end', '321', '1')

# Very long step name should be fine
assert rfc1123.match(generate_rfc1123_name('Helloflow', '1', 'end'*50, '321', '1'))

# Very long run id should be fine too
assert rfc1123.match(generate_rfc1123_name('Helloflow', '1'*100, 'end', '321', '1'))