diff --git a/README.md b/README.md index f9f361c46..c8f0438f3 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ This repo also includes related tools for [running tasks](#kubernetes-run) and [ * [Installation](#installation) * [Usage](#usage) * [Using templates and variables](#using-templates-and-variables) + * [Customizing behaviour with annotations](#customizing-behaviour-with-annotations) * [Running tasks at the beginning of a deploy](#running-tasks-at-the-beginning-of-a-deploy) * [Deploying Kubernetes secrets (from EJSON)](#deploying-kubernetes-secrets-from-ejson) @@ -120,6 +121,11 @@ All templates must be YAML formatted. You can also use ERB. The following local You can add additional variables using the `--bindings=BINDINGS` option. For example, `kubernetes-deploy my-app cluster1 --bindings=color=blue,size=large` will expose `color` and `size` in your templates. +### Customizing behaviour with annotations + +- `kubernetes-deploy.shopify.io/timeout-override`: Override the tool's hard timeout for one specific resource. Both full ISO8601 durations and the time portion of ISO8601 durations are valid. Value must be between 1 second and 24 hours. + - _Example values_: 45s / 3m / 1h / PT0.25H + - _Compatibility_: all resource types (Note: `Deployment` timeouts are based on `spec.progressDeadlineSeconds` if present, and that field has a default value as of the `apps/v1beta1` group version. Using this annotation will have no effect on `Deployment`s that time out with "Timeout reason: ProgressDeadlineExceeded".) ### Running tasks at the beginning of a deploy diff --git a/lib/kubernetes-deploy.rb b/lib/kubernetes-deploy.rb index 390ca087b..31918d791 100644 --- a/lib/kubernetes-deploy.rb +++ b/lib/kubernetes-deploy.rb @@ -8,6 +8,7 @@ require 'active_support/core_ext/string/strip' require 'active_support/core_ext/hash/keys' require 'active_support/core_ext/array/conversions' +require 'active_support/duration' require 'colorized_string' require 'kubernetes-deploy/version' @@ -17,6 +18,7 @@ require 'kubernetes-deploy/statsd' require 'kubernetes-deploy/concurrency' require 'kubernetes-deploy/bindings_parser' +require 'kubernetes-deploy/duration_parser' module KubernetesDeploy KubernetesDeploy::StatsD.build diff --git a/lib/kubernetes-deploy/duration_parser.rb b/lib/kubernetes-deploy/duration_parser.rb new file mode 100644 index 000000000..662e8144d --- /dev/null +++ b/lib/kubernetes-deploy/duration_parser.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module KubernetesDeploy + ## + # This class is a less strict extension of ActiveSupport::Duration::ISO8601Parser. + # In addition to full ISO8601 durations, it can parse unprefixed ISO8601 time components (e.g. '1H'). + # It is also case-insensitive. + # For example, this class considers the values "1H", "1h" and "PT1H" to be valid and equivalent. + + class DurationParser + class ParsingError < ArgumentError; end + + def initialize(value) + @iso8601_str = value.to_s.strip.upcase + end + + def parse! + ActiveSupport::Duration.parse("PT#{@iso8601_str}") # By default assume it is just a time component + rescue ActiveSupport::Duration::ISO8601Parser::ParsingError + begin + ActiveSupport::Duration.parse(@iso8601_str) + rescue ActiveSupport::Duration::ISO8601Parser::ParsingError => e + raise ParsingError, e.message + end + end + end +end diff --git a/lib/kubernetes-deploy/kubernetes_resource.rb b/lib/kubernetes-deploy/kubernetes_resource.rb index efbb17d5b..d5f64564d 100644 --- a/lib/kubernetes-deploy/kubernetes_resource.rb +++ b/lib/kubernetes-deploy/kubernetes_resource.rb @@ -6,7 +6,7 @@ module KubernetesDeploy class KubernetesResource - attr_reader :name, :namespace, :context, :validation_error_msg + attr_reader :name, :namespace, :context attr_writer :type, :deploy_started_at TIMEOUT = 5.minutes @@ -22,6 +22,8 @@ class KubernetesResource If you have reason to believe it will succeed, retry the deploy to continue to monitor the rollout. MSG + TIMEOUT_OVERRIDE_ANNOTATION = "kubernetes-deploy.shopify.io/timeout-override" + def self.build(namespace:, context:, definition:, logger:) opts = { namespace: namespace, context: context, definition: definition, logger: logger } if KubernetesDeploy.const_defined?(definition["kind"]) @@ -39,9 +41,17 @@ def self.timeout end def timeout + return timeout_override if timeout_override.present? self.class.timeout end + def timeout_override + return @timeout_override if defined?(@timeout_override) + @timeout_override = DurationParser.new(timeout_annotation).parse!.to_i + rescue DurationParser::ParsingError + @timeout_override = nil + end + def pretty_timeout_type "timeout: #{timeout}s" end @@ -59,20 +69,26 @@ def initialize(namespace:, context:, definition:, logger:) @logger = logger @definition = definition @statsd_report_done = false - @validation_error_msg = nil + @validation_errors = [] end def validate_definition - @validation_error_msg = nil + @validation_errors = [] + validate_timeout_annotation + command = ["create", "-f", file_path, "--dry-run", "--output=name"] _, err, st = kubectl.run(*command, log_failure: false) return true if st.success? - @validation_error_msg = err + @validation_errors << err false end + def validation_error_msg + @validation_errors.join("\n") + end + def validation_failed? - @validation_error_msg.present? + @validation_errors.present? end def id @@ -277,6 +293,23 @@ def to_s private + def validate_timeout_annotation + return if timeout_annotation.nil? + + override = DurationParser.new(timeout_annotation).parse! + if override <= 0 + @validation_errors << "#{TIMEOUT_OVERRIDE_ANNOTATION} annotation is invalid: Value must be greater than 0" + elsif override > 24.hours + @validation_errors << "#{TIMEOUT_OVERRIDE_ANNOTATION} annotation is invalid: Value must be less than 24h" + end + rescue DurationParser::ParsingError => e + @validation_errors << "#{TIMEOUT_OVERRIDE_ANNOTATION} annotation is invalid: #{e}" + end + + def timeout_annotation + @definition.dig("metadata", "annotations", TIMEOUT_OVERRIDE_ANNOTATION) + end + def file @file ||= create_definition_tempfile end diff --git a/test/fixtures/hello-cloud/unmanaged-pod.yml.erb b/test/fixtures/hello-cloud/unmanaged-pod.yml.erb index fdc75bdb9..83b3cd8bb 100644 --- a/test/fixtures/hello-cloud/unmanaged-pod.yml.erb +++ b/test/fixtures/hello-cloud/unmanaged-pod.yml.erb @@ -2,6 +2,8 @@ apiVersion: v1 kind: Pod metadata: name: unmanaged-pod-<%= deployment_id %> + annotations: + kubernetes-deploy.shopify.io/timeout-override: "42s" labels: type: unmanaged-pod name: unmanaged-pod-<%= deployment_id %> diff --git a/test/integration/kubernetes_deploy_test.rb b/test/integration/kubernetes_deploy_test.rb index ca9243b92..e81056970 100644 --- a/test/integration/kubernetes_deploy_test.rb +++ b/test/integration/kubernetes_deploy_test.rb @@ -9,6 +9,7 @@ def test_full_hello_cloud_set_deploy_succeeds assert_logs_match_all([ "Deploying ConfigMap/hello-cloud-configmap-data (timeout: 30s)", + %r{Deploying Pod/unmanaged-pod-[-\w]+ \(timeout: 42s\)}, # annotation timeout override "Hello from the command runner!", # unmanaged pod logs "Result: SUCCESS", "Successfully deployed 17 resources" diff --git a/test/unit/kubernetes-deploy/duration_parser_test.rb b/test/unit/kubernetes-deploy/duration_parser_test.rb new file mode 100644 index 000000000..429112d98 --- /dev/null +++ b/test/unit/kubernetes-deploy/duration_parser_test.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true +require 'test_helper' + +class DurationParserTest < KubernetesDeploy::TestCase + def test_parses_correct_iso_durations_with_prefixes + assert_equal 300, new_parser("PT300S").parse! + assert_equal 300, new_parser("PT5M").parse! + assert_equal 900, new_parser("PT0.25H").parse! + assert_equal 110839937, new_parser("P3Y6M4DT12H30M5S").parse! + end + + def test_parses_correct_iso_durations_without_prefixes + assert_equal 300, new_parser("300S").parse! + assert_equal 300, new_parser("5M").parse! + assert_equal 900, new_parser("0.25H").parse! + assert_equal(-60, new_parser("-1M").parse!) + end + + def test_parse_is_case_insensitive + assert_equal 30, new_parser("30S").parse! + assert_equal 30, new_parser("30s").parse! + assert_equal 30, new_parser("pt30s").parse! + assert_equal 110839937, new_parser("p3y6M4dT12H30M5s").parse! + end + + def test_parse_raises_expected_error_for_blank_values + ["", " ", nil].each do |blank_value| + expected_msg = 'Invalid ISO 8601 duration: "" is empty duration' + assert_raises_message(KubernetesDeploy::DurationParser::ParsingError, expected_msg) do + new_parser(blank_value).parse! + end + end + end + + def test_extra_whitespace_is_stripped_from_values + assert_equal 30, new_parser(" 30S ").parse! + end + + def test_parse_raises_expected_error_when_value_is_invalid + assert_raises_message(KubernetesDeploy::DurationParser::ParsingError, 'Invalid ISO 8601 duration: "FOO"') do + new_parser("foo").parse! + end + end + + private + + def new_parser(value) + KubernetesDeploy::DurationParser.new(value) + end +end diff --git a/test/unit/kubernetes-deploy/kubernetes_resource_test.rb b/test/unit/kubernetes-deploy/kubernetes_resource_test.rb index 110d907e5..d28467aa6 100644 --- a/test/unit/kubernetes-deploy/kubernetes_resource_test.rb +++ b/test/unit/kubernetes-deploy/kubernetes_resource_test.rb @@ -3,14 +3,35 @@ class KubernetesResourceTest < KubernetesDeploy::TestCase class DummyResource < KubernetesDeploy::KubernetesResource - def initialize(*) - definition = { "kind" => "DummyResource", "metadata" => { "name" => "test" } } - super(namespace: 'test', context: 'test', definition: definition, logger: @logger) + attr_writer :succeeded + + def initialize(definition_extras: {}) + definition = { "kind" => "DummyResource", "metadata" => { "name" => "test" } }.merge(definition_extras) + super(namespace: 'test', context: 'test', definition: definition, logger: ::Logger.new($stderr)) + @succeeded = false end def exists? true end + + def deploy_succeeded? + @succeeded + end + + def file_path + "/tmp/foo/bar" + end + + def kubectl + @kubectl_stub ||= begin + kubectl_stub = super + def kubectl_stub.run(*) + ["", "", SystemExit.new(0)] + end + kubectl_stub + end + end end def test_service_and_deployment_timeouts_are_equal @@ -27,7 +48,7 @@ def test_fetch_events_parses_tricky_events_correctly tricky_events = dummy_events(start_time) assert tricky_events.first[:message].count("\n") > 1, "Sanity check failed: inadequate newlines in test events" - stub_kubectl_response("get", "events", anything, resp: build_event_jsonpath(tricky_events), json: false) + dummy.kubectl.expects(:run).returns([build_event_jsonpath(tricky_events), "", SystemExit.new(0)]) events = dummy.fetch_events assert_includes_dummy_events(events, first: true, second: true) end @@ -39,7 +60,7 @@ def test_fetch_events_excludes_events_from_previous_deploys mixed_time_events = dummy_events(start_time) mixed_time_events.first[:last_seen] = 1.hour.ago - stub_kubectl_response("get", "events", anything, resp: build_event_jsonpath(mixed_time_events), json: false) + dummy.kubectl.expects(:run).returns([build_event_jsonpath(mixed_time_events), "", SystemExit.new(0)]) events = dummy.fetch_events assert_includes_dummy_events(events, first: false, second: true) end @@ -48,13 +69,137 @@ def test_fetch_events_returns_empty_hash_when_kubectl_results_empty dummy = DummyResource.new dummy.deploy_started_at = Time.now.utc - 10.seconds - stub_kubectl_response("get", "events", anything, resp: "", json: false) + dummy.kubectl.expects(:run).returns(["", "", SystemExit.new(0)]) events = dummy.fetch_events assert_operator events, :empty? end + def test_can_override_hardcoded_timeout_via_an_annotation + basic_resource = DummyResource.new + assert_equal 300, basic_resource.timeout + + customized_resource = DummyResource.new(definition_extras: build_timeout_metadata("60S")) + assert_equal 60, customized_resource.timeout + + customized_resource = DummyResource.new(definition_extras: build_timeout_metadata("60M")) + assert_equal 3600, customized_resource.timeout + + customized_resource = DummyResource.new(definition_extras: build_timeout_metadata("1H")) + assert_equal 3600, customized_resource.timeout + end + + def test_blank_timeout_annotation_is_invalid + customized_resource = DummyResource.new(definition_extras: build_timeout_metadata("")) + customized_resource.validate_definition + assert customized_resource.validation_failed?, "Blank annotation was valid" + assert_equal "#{timeout_override_err_prefix}: Invalid ISO 8601 duration: \"\" is empty duration", + customized_resource.validation_error_msg + end + + def test_lack_of_timeout_annotation_does_not_fail_validation + basic_resource = DummyResource.new + assert_equal 300, basic_resource.timeout + basic_resource.validate_definition + refute basic_resource.validation_failed? + end + + def test_timeout_override_lower_bound_validation + customized_resource = DummyResource.new(definition_extras: build_timeout_metadata("-1S")) + customized_resource.validate_definition + assert customized_resource.validation_failed?, "Annotation with '-1' was valid" + assert_equal "#{timeout_override_err_prefix}: Value must be greater than 0", + customized_resource.validation_error_msg + + customized_resource = DummyResource.new(definition_extras: build_timeout_metadata("0S")) + customized_resource.validate_definition + assert customized_resource.validation_failed?, "Annotation with '0' was valid" + assert_equal "#{timeout_override_err_prefix}: Value must be greater than 0", + customized_resource.validation_error_msg + + customized_resource = DummyResource.new(definition_extras: build_timeout_metadata("1S")) + customized_resource.validate_definition + refute customized_resource.validation_failed?, "Annotation with '1' was invalid" + end + + def test_timeout_override_upper_bound_validation + customized_resource = DummyResource.new(definition_extras: build_timeout_metadata("24H1S")) + customized_resource.validate_definition + assert customized_resource.validation_failed?, "Annotation with '24H1S' was valid" + assert_equal "#{timeout_override_err_prefix}: Value must be less than 24h", customized_resource.validation_error_msg + + customized_resource = DummyResource.new(definition_extras: build_timeout_metadata("24H")) + customized_resource.validate_definition + refute customized_resource.validation_failed?, "Annotation with '24H' was invalid" + end + + def test_annotation_and_kubectl_error_messages_are_combined + customized_resource = DummyResource.new(definition_extras: build_timeout_metadata("bad")) + customized_resource.kubectl.expects(:run).returns([ + "{}", + "Error from kubectl: Something else in this template was not valid", + stub(success?: false) + ]) + + customized_resource.validate_definition + assert customized_resource.validation_failed?, "Expected resource to be invalid" + expected = <<~STRING.strip + #{timeout_override_err_prefix}: Invalid ISO 8601 duration: "BAD" + Error from kubectl: Something else in this template was not valid + STRING + assert_equal expected, customized_resource.validation_error_msg + end + + def test_calling_timeout_before_validation_with_invalid_annotation_does_not_raise + customized_resource = DummyResource.new(definition_extras: build_timeout_metadata("bad")) + assert_equal 300, customized_resource.timeout + assert_nil customized_resource.timeout_override + end + + def test_deploy_timed_out_respects_hardcoded_timeouts + Timecop.freeze do + dummy = DummyResource.new + refute dummy.deploy_timed_out? + assert_equal 300, dummy.timeout + + dummy.deploy_started_at = Time.now.utc - 300 + refute dummy.deploy_timed_out? + + dummy.deploy_started_at = Time.now.utc - 301 + assert dummy.deploy_timed_out? + end + end + + def test_deploy_timed_out_respects_annotation_based_timeouts + Timecop.freeze do + custom_dummy = DummyResource.new(definition_extras: build_timeout_metadata("3s")) + refute custom_dummy.deploy_timed_out? + assert_equal 3, custom_dummy.timeout + + custom_dummy.deploy_started_at = Time.now.utc - 3 + refute custom_dummy.deploy_timed_out? + + custom_dummy.deploy_started_at = Time.now.utc - 4 + assert custom_dummy.deploy_timed_out? + end + end + private + def timeout_override_err_prefix + "kubernetes-deploy.shopify.io/timeout-override annotation is invalid" + end + + def build_timeout_metadata(value) + { + "metadata" => { + "name" => "customized", + "annotations" => { + KubernetesDeploy::KubernetesResource::TIMEOUT_OVERRIDE_ANNOTATION => value + } + } + } + end + def assert_includes_dummy_events(events, first:, second:) unless first || second assert_operator events, :empty?