From 5a0b6e5edb76f2c2a8ce98350fa13e4da7bd6b79 Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Wed, 1 Sep 2021 15:51:37 -0400 Subject: [PATCH 01/19] Start of supporting restart task for statefulsets and daemonsets --- lib/krane/cli/restart_command.rb | 8 ++- lib/krane/restart_task.rb | 102 ++++++++++++++++++++++--------- 2 files changed, 81 insertions(+), 29 deletions(-) diff --git a/lib/krane/cli/restart_command.rb b/lib/krane/cli/restart_command.rb index 89a39b790..18fef2d6b 100644 --- a/lib/krane/cli/restart_command.rb +++ b/lib/krane/cli/restart_command.rb @@ -6,7 +6,11 @@ class RestartCommand DEFAULT_RESTART_TIMEOUT = '300s' OPTIONS = { "deployments" => { type: :array, banner: "list of deployments", - desc: "List of workload names to restart" }, + desc: "List of workload names to restart", default: [] }, + "stateful-sets" => { type: :array, banner: "list of deployments", + desc: "List of workload names to restart", default: [] }, + "daemonsets" => { type: :array, banner: "list of deployments", + desc: "List of workload names to restart", default: [] }, "global-timeout" => { type: :string, banner: "duration", default: DEFAULT_RESTART_TIMEOUT, desc: "Max duration to monitor workloads correctly restarted" }, "selector" => { type: :string, banner: "'label=value'", @@ -25,6 +29,8 @@ def self.from_options(namespace, context, options) ) restart.run!( deployments: options[:deployments], + stateful_sets: options[:stateful_sets], + daemonsets: options[:daemonsets], selector: selector, verify_result: options["verify-result"] ) diff --git a/lib/krane/restart_task.rb b/lib/krane/restart_task.rb index bafe57b45..0735e9c06 100644 --- a/lib/krane/restart_task.rb +++ b/lib/krane/restart_task.rb @@ -58,13 +58,14 @@ def run(**args) # @param verify_result [Boolean] Wait for completion and verify success # # @return [nil] - def run!(deployments: nil, selector: nil, verify_result: true) + def run!(deployments: [], statefulsets: [], daemonsets: [], selector: nil, verify_result: true) start = Time.now.utc @logger.reset @logger.phase_heading("Initializing restart") verify_config! - deployments = identify_target_deployments(deployments, selector: selector) + # TODO: change deployments varname to workloads + deployments = identify_target_workloads(deployments, statefulsets, daemonsets, selector: selector) @logger.phase_heading("Triggering restart by touching ENV[RESTARTED_AT]") patch_kubeclient_deployments(deployments) @@ -97,38 +98,83 @@ def tags(status, deployments) %W(namespace:#{@namespace} context:#{@context} status:#{status} deployments:#{deployments.to_a.length}}) end - def identify_target_deployments(deployment_names, selector: nil) - if deployment_names.nil? - deployments = if selector.nil? - @logger.info("Configured to restart all deployments with the `#{ANNOTATION}` annotation") - apps_v1_kubeclient.get_deployments(namespace: @namespace) - else - selector_string = selector.to_s - @logger.info( - "Configured to restart all deployments with the `#{ANNOTATION}` annotation and #{selector_string} selector" - ) - apps_v1_kubeclient.get_deployments(namespace: @namespace, label_selector: selector_string) - end - deployments.select! { |d| d.metadata.annotations[ANNOTATION] } + def identify_target_workloads(deployment_names, statefulset_names, daemonset_names, selector: nil) + if deployment_names.blank? && statefulset_names.blank? && daemonset_names.blank? + @logger.info("Configured to restart all workloads with the `#{ANNOTATION}` annotation") if selector.nil? + deployments = identify_target_deployments(deployment_names, selector: selector) + statefulsets = identify_target_statefulsets(statefulset_names, selector: selector) + daemonsets = identify_target_daemonsets(daemonset_names, selector: selector) - if deployments.none? - raise FatalRestartError, "no deployments with the `#{ANNOTATION}` annotation found in namespace #{@namespace}" + if deployments.none? && statefulsets.none? && daemonsets.none? + raise FatalRestartError, "no deployments, statefulsets, or daemonsets, with the `#{ANNOTATION}`" \ + "annotation found in namespace #{@namespace}" end - elsif deployment_names.empty? - raise FatalRestartError, "Configured to restart deployments by name, but list of names was blank" + elsif deployment_names.empty?**statefulset_names.empty? && daemonset_names.empty? + raise FatalRestartError, "Configured to restart workloads by name, but list of names was blank" elsif !selector.nil? - raise FatalRestartError, "Can't specify deployment names and selector at the same time" + raise FatalRestartError, "Can't specify workload names and selector at the same time" else - deployment_names = deployment_names.uniq - list = deployment_names.join(', ') - @logger.info("Configured to restart deployments by name: #{list}") - - deployments = fetch_deployments(deployment_names) - if deployments.none? - raise FatalRestartError, "no deployments with names #{list} found in namespace #{@namespace}" + deployments, statefulsets, daemonsets = identify_target_workloads_by_name(deployment_names, + statefulset_names, daemonset_names) + if deployments.none? && statefulsets.none? && daemonsets.none? + error_msgs = [] + error_msgs << "no deployments with names #{list} found in namespace #{@namespace}" if deployment_names + error_msgs << "no statefulsets with names #{list} found in namespace #{@namespace}" if statefulset_names + error_msgs << "no daemonsets with names #{list} found in namespace #{@namespace}" if daemonset_names + raise FatalRestartError, error_msgs.join(', ') end end - deployments + [deployments, statefulsets, daemonsets] + end + + def identify_target_workloads_by_name(deployment_names, statefulset_names, daemonset_names) + deployment_list = deployment_names.uniq.join(', ') + statefulset_list = statefulset_names.uniq.join(', ') + daemonset_list = daemonset_names.uniq.join(', ') + @logger.info("Configured to restart deployments by name: #{deployment_list}") if deployment_list.present? + @logger.info("Configured to restart statefulsets by name: #{statefulset_list}") if statefulset_list.present? + @logger.info("Configured to restart daemonsets by name: #{daemonset_list}") if daemonset_list.present? + + [fetch_deployments(deployment_names), fetch_statefulsets(statefulset_names), fetch_daemonsets(daemonset_names)] + end + + def identify_target_deployments(selector: nil) + if selector.nil? + apps_v1_kubeclient.get_deployments(namespace: @namespace) + else + selector_string = selector.to_s + @logger.info( + "Configured to restart all deployments with the `#{ANNOTATION}` annotation and #{selector_string} selector" + ) + apps_v1_kubeclient.get_deployments(namespace: @namespace, label_selector: selector_string) + end + deployments.select! { |d| d.metadata.annotations[ANNOTATION] } + end + + def identify_target_statefulsets(selector: nil) + if selector.nil? + apps_v1_kubeclient.get_statefulsets(namespace: @namespace) + else + selector_string = selector.to_s + @logger.info( + "Configured to restart all deployments with the `#{ANNOTATION}` annotation and #{selector_string} selector" + ) + apps_v1_kubeclient.get_statefulsets(namespace: @namespace, label_selector: selector_string) + end + deployments.select! { |d| d.metadata.annotations[ANNOTATION] } + end + + def identify_target_daemonsets(selector: nil) + if selector.nil? + apps_v1_kubeclient.get_daemonsets(namespace: @namespace) + else + selector_string = selector.to_s + @logger.info( + "Configured to restart all deployments with the `#{ANNOTATION}` annotation and #{selector_string} selector" + ) + apps_v1_kubeclient.get_daemonsets(namespace: @namespace, label_selector: selector_string) + end + deployments.select! { |d| d.metadata.annotations[ANNOTATION] } end def build_watchables(kubeclient_resources, started) From 485463a6ac5b74ae8db3b8bcdb6be2c0d825c447 Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Wed, 1 Sep 2021 15:54:45 -0400 Subject: [PATCH 02/19] s/stateful_set/statefulset --- lib/krane/cli/restart_command.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/krane/cli/restart_command.rb b/lib/krane/cli/restart_command.rb index 18fef2d6b..4fdc081b2 100644 --- a/lib/krane/cli/restart_command.rb +++ b/lib/krane/cli/restart_command.rb @@ -6,11 +6,11 @@ class RestartCommand DEFAULT_RESTART_TIMEOUT = '300s' OPTIONS = { "deployments" => { type: :array, banner: "list of deployments", - desc: "List of workload names to restart", default: [] }, - "stateful-sets" => { type: :array, banner: "list of deployments", - desc: "List of workload names to restart", default: [] }, - "daemonsets" => { type: :array, banner: "list of deployments", - desc: "List of workload names to restart", default: [] }, + desc: "List of deployment names to restart", default: [] }, + "statefulsets" => { type: :array, banner: "list of statefulsets", + desc: "List of statefulset names to restart", default: [] }, + "daemonsets" => { type: :array, banner: "list of daemonsets", + desc: "List of daemonset names to restart", default: [] }, "global-timeout" => { type: :string, banner: "duration", default: DEFAULT_RESTART_TIMEOUT, desc: "Max duration to monitor workloads correctly restarted" }, "selector" => { type: :string, banner: "'label=value'", @@ -29,7 +29,7 @@ def self.from_options(namespace, context, options) ) restart.run!( deployments: options[:deployments], - stateful_sets: options[:stateful_sets], + statefulsets: options[:statefulsets], daemonsets: options[:daemonsets], selector: selector, verify_result: options["verify-result"] From 4a94c8f07134823345f2fb4f28719f6d3676b145 Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Thu, 2 Sep 2021 11:27:42 -0400 Subject: [PATCH 03/19] CLI tests for new restart command flags --- lib/krane/cli/restart_command.rb | 2 +- test/exe/restart_test.rb | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/lib/krane/cli/restart_command.rb b/lib/krane/cli/restart_command.rb index 4fdc081b2..997357a3c 100644 --- a/lib/krane/cli/restart_command.rb +++ b/lib/krane/cli/restart_command.rb @@ -8,7 +8,7 @@ class RestartCommand "deployments" => { type: :array, banner: "list of deployments", desc: "List of deployment names to restart", default: [] }, "statefulsets" => { type: :array, banner: "list of statefulsets", - desc: "List of statefulset names to restart", default: [] }, + desc: "List of statefulset names to restart", default: [] }, "daemonsets" => { type: :array, banner: "list of daemonsets", desc: "List of daemonset names to restart", default: [] }, "global-timeout" => { type: :string, banner: "duration", default: DEFAULT_RESTART_TIMEOUT, diff --git a/test/exe/restart_test.rb b/test/exe/restart_test.rb index deeb98437..44bd06e02 100644 --- a/test/exe/restart_test.rb +++ b/test/exe/restart_test.rb @@ -22,6 +22,27 @@ def test_restart_passes_deployments_transparently krane_restart!(flags: '--deployments web jobs') end + def test_restart_passes_statefulsets_transparently + set_krane_restart_expectations(run_args: { statefulsets: ['ss'] }) + krane_restart!(flags: '--statefulsets ss') + set_krane_restart_expectations(run_args: { statefulsets: ['ss', 'ss-2'] }) + krane_restart!(flags: '--statefulsets ss ss-2') + end + + def test_restart_passes_daemonsets_transparently + set_krane_restart_expectations(run_args: { daemonsets: ['ds'] }) + krane_restart!(flags: '--daemonsets ds') + set_krane_restart_expectations(run_args: { daemonsets: ['ds', 'ds-2'] }) + krane_restart!(flags: '--daemonsets ds ds-2') + end + + def test_restart_passes_multiple_workload_types_transparently + set_krane_restart_expectations( + run_args: { deployments: ['web', 'jobs'], statefulsets: ['ss', 'ss-2'], daemonsets: ['ds', 'ds-2'] } + ) + krane_restart!(flags: '--deployments web jobs --statefulsets ss ss-2 --daemonsets ds ds-2') + end + def test_restart_parses_selector options = default_options response = mock('RestartTask') @@ -74,7 +95,9 @@ def default_options(new_args = {}, run_args = {}) global_timeout: 300, }.merge(new_args), run_args: { - deployments: nil, + deployments: [], + statefulsets: [], + daemonsets: [], selector: nil, verify_result: true, }.merge(run_args), From f312bb2a54eb9c6e8783be16898fef035547371d Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Thu, 2 Sep 2021 14:30:30 -0400 Subject: [PATCH 04/19] Fix serial_test regressions --- Rakefile | 7 ++ lib/krane/restart_task.rb | 92 +++++++++++++++++---------- test/integration/restart_task_test.rb | 49 +++++++------- 3 files changed, 90 insertions(+), 58 deletions(-) diff --git a/Rakefile b/Rakefile index ff1e9de3e..e466a0b10 100644 --- a/Rakefile +++ b/Rakefile @@ -30,6 +30,13 @@ Rake::TestTask.new(:cli_test) do |t| t.test_files = FileList['test/exe/**/*_test.rb'] end +desc("Run restart_task tests") +Rake::TestTask.new(:restart_test) do |t| + t.libs << "test" + t.libs << "lib" + t.test_files = FileList['test/integration/restart_task_test.rb'] +end + desc("Run all tests") task(test: %w(unit_test serial_integration_test integration_test cli_test)) diff --git a/lib/krane/restart_task.rb b/lib/krane/restart_task.rb index 0735e9c06..e175bc41b 100644 --- a/lib/krane/restart_task.rb +++ b/lib/krane/restart_task.rb @@ -64,8 +64,8 @@ def run!(deployments: [], statefulsets: [], daemonsets: [], selector: nil, verif @logger.phase_heading("Initializing restart") verify_config! - # TODO: change deployments varname to workloads - deployments = identify_target_workloads(deployments, statefulsets, daemonsets, selector: selector) + deployments, statefulsets, daemonsets = identify_target_workloads(deployments, statefulsets, + daemonsets, selector: selector) @logger.phase_heading("Triggering restart by touching ENV[RESTARTED_AT]") patch_kubeclient_deployments(deployments) @@ -100,16 +100,22 @@ def tags(status, deployments) def identify_target_workloads(deployment_names, statefulset_names, daemonset_names, selector: nil) if deployment_names.blank? && statefulset_names.blank? && daemonset_names.blank? - @logger.info("Configured to restart all workloads with the `#{ANNOTATION}` annotation") if selector.nil? - deployments = identify_target_deployments(deployment_names, selector: selector) - statefulsets = identify_target_statefulsets(statefulset_names, selector: selector) - daemonsets = identify_target_daemonsets(daemonset_names, selector: selector) + if selector.nil? + @logger.info("Configured to restart all workloads with the `#{ANNOTATION}` annotation") + else + @logger.info( + "Configured to restart all workloads with the `#{ANNOTATION}` annotation and #{selector} selector" + ) + end + deployments = identify_target_deployments(selector: selector) + statefulsets = identify_target_statefulsets(selector: selector) + daemonsets = identify_target_daemonsets(selector: selector) if deployments.none? && statefulsets.none? && daemonsets.none? - raise FatalRestartError, "no deployments, statefulsets, or daemonsets, with the `#{ANNOTATION}`" \ + raise FatalRestartError, "no deployments, statefulsets, or daemonsets, with the `#{ANNOTATION}` " \ "annotation found in namespace #{@namespace}" end - elsif deployment_names.empty?**statefulset_names.empty? && daemonset_names.empty? + elsif deployment_names.empty? && statefulset_names.empty? && daemonset_names.empty? raise FatalRestartError, "Configured to restart workloads by name, but list of names was blank" elsif !selector.nil? raise FatalRestartError, "Can't specify workload names and selector at the same time" @@ -128,53 +134,47 @@ def identify_target_workloads(deployment_names, statefulset_names, daemonset_nam end def identify_target_workloads_by_name(deployment_names, statefulset_names, daemonset_names) - deployment_list = deployment_names.uniq.join(', ') - statefulset_list = statefulset_names.uniq.join(', ') - daemonset_list = daemonset_names.uniq.join(', ') - @logger.info("Configured to restart deployments by name: #{deployment_list}") if deployment_list.present? - @logger.info("Configured to restart statefulsets by name: #{statefulset_list}") if statefulset_list.present? - @logger.info("Configured to restart daemonsets by name: #{daemonset_list}") if daemonset_list.present? + deployment_names = deployment_names.uniq + statefulset_names = statefulset_names.uniq + daemonset_names = daemonset_names.uniq + @logger.info("Configured to restart deployments by name: #{deployment_names.join(', ')}") if deployment_names.present? + @logger.info("Configured to restart statefulsets by name: #{statefulset_names.join(', ')}") if statefulset_names.present? + @logger.info("Configured to restart daemonsets by name: #{daemonset_names.join(', ')}") if daemonset_names.present? [fetch_deployments(deployment_names), fetch_statefulsets(statefulset_names), fetch_daemonsets(daemonset_names)] end def identify_target_deployments(selector: nil) - if selector.nil? + deployments = if selector.nil? apps_v1_kubeclient.get_deployments(namespace: @namespace) else selector_string = selector.to_s - @logger.info( - "Configured to restart all deployments with the `#{ANNOTATION}` annotation and #{selector_string} selector" - ) apps_v1_kubeclient.get_deployments(namespace: @namespace, label_selector: selector_string) end - deployments.select! { |d| d.metadata.annotations[ANNOTATION] } + deployments.each { |d| d.kind = "Deployment" } + deployments.select { |d| d.metadata.annotations[ANNOTATION] } end def identify_target_statefulsets(selector: nil) - if selector.nil? - apps_v1_kubeclient.get_statefulsets(namespace: @namespace) + statefulsets = if selector.nil? + apps_v1_kubeclient.get_stateful_sets(namespace: @namespace) else selector_string = selector.to_s - @logger.info( - "Configured to restart all deployments with the `#{ANNOTATION}` annotation and #{selector_string} selector" - ) - apps_v1_kubeclient.get_statefulsets(namespace: @namespace, label_selector: selector_string) + apps_v1_kubeclient.get_stateful_sets(namespace: @namespace, label_selector: selector_string) end - deployments.select! { |d| d.metadata.annotations[ANNOTATION] } + statefulsets.each { |d| d.kind = "StatefulSet" } + statefulsets.select { |d| d.metadata.annotations[ANNOTATION] } end def identify_target_daemonsets(selector: nil) - if selector.nil? - apps_v1_kubeclient.get_daemonsets(namespace: @namespace) + daemonsets = if selector.nil? + apps_v1_kubeclient.get_daemon_sets(namespace: @namespace) else selector_string = selector.to_s - @logger.info( - "Configured to restart all deployments with the `#{ANNOTATION}` annotation and #{selector_string} selector" - ) - apps_v1_kubeclient.get_daemonsets(namespace: @namespace, label_selector: selector_string) + apps_v1_kubeclient.get_daemon_sets(namespace: @namespace, label_selector: selector_string) end - deployments.select! { |d| d.metadata.annotations[ANNOTATION] } + daemonsets.each { |d| d.kind = "DaemonSet" } + daemonsets.select { |d| d.metadata.annotations[ANNOTATION] } end def build_watchables(kubeclient_resources, started) @@ -198,7 +198,7 @@ def patch_kubeclient_deployments(deployments) deployments.each do |record| begin patch_deployment_with_restart(record) - @logger.info("Triggered `#{record.metadata.name}` restart") + @logger.info("Triggered `#{record.kind}/#{record.metadata.name}` restart") rescue Kubeclient::HttpError => e raise RestartAPIError.new(record.metadata.name, e.message) end @@ -217,6 +217,30 @@ def fetch_deployments(list) end end + def fetch_statefulsets(list) + list.map do |name| + record = nil + begin + record = apps_v1_kubeclient.get_stateful_sets(name, @namespace) + rescue Kubeclient::ResourceNotFoundError + raise FatalRestartError, "StatefulSet `#{name}` not found in namespace `#{@namespace}`" + end + record + end + end + + def fetch_daemonsets(list) + list.map do |name| + record = nil + begin + record = apps_v1_kubeclient.get_daemon_sets(name, @namespace) + rescue Kubeclient::ResourceNotFoundError + raise FatalRestartError, "DaemonSet `#{name}` not found in namespace `#{@namespace}`" + end + record + end + end + def build_patch_payload(deployment) containers = deployment.spec.template.spec.containers { diff --git a/test/integration/restart_task_test.rb b/test/integration/restart_task_test.rb index 543b2195e..5427647e7 100644 --- a/test/integration/restart_task_test.rb +++ b/test/integration/restart_task_test.rb @@ -14,8 +14,8 @@ def test_restart_by_annotation assert_restart_success(restart.perform) assert_logs_match_all([ - "Configured to restart all deployments with the `shipit.shopify.io/restart` annotation", - "Triggered `web` restart", + "Configured to restart all workloads with the `shipit.shopify.io/restart` annotation", + "Triggered `Deployment/web` restart", "Waiting for rollout", %r{Successfully restarted in \d+\.\d+s: Deployment/web}, "Result: SUCCESS", @@ -45,9 +45,9 @@ def test_restart_by_selector assert_restart_success(restart.perform(selector: Krane::LabelSelector.parse("name=web,branch=staging"))) assert_logs_match_all([ - "Configured to restart all deployments with the `shipit.shopify.io/restart` annotation " \ + "Configured to restart all workloads with the `shipit.shopify.io/restart` annotation " \ "and name=web,branch=staging selector", - "Triggered `staging-web` restart", + "Triggered `Deployment/staging-web` restart", "Waiting for rollout", %r{Successfully restarted in \d+\.\ds: Deployment/staging-web}, "Result: SUCCESS", @@ -64,9 +64,9 @@ def test_restart_by_annotation_none_found restart = build_restart_task assert_restart_failure(restart.perform) assert_logs_match_all([ - "Configured to restart all deployments with the `shipit.shopify.io/restart` annotation", + "Configured to restart all workloads with the `shipit.shopify.io/restart` annotation", "Result: FAILURE", - %r{No deployments with the `shipit\.shopify\.io/restart` annotation found in namespace}, + %r{No deployments, statefulsets, or daemonsets, with the `shipit\.shopify\.io/restart` annotation found in namespace}, ], in_order: true) end @@ -82,7 +82,7 @@ def test_restart_named_deployments_twice assert_logs_match_all([ "Configured to restart deployments by name: web", - "Triggered `web` restart", + "Triggered `Deployment/web` restart", "Waiting for rollout", %r{Successfully restarted in \d+\.\d+s: Deployment/web}, "Result: SUCCESS", @@ -114,7 +114,7 @@ def test_restart_with_same_resource_twice assert_logs_match_all([ "Configured to restart deployments by name: web", - "Triggered `web` restart", + "Triggered `Deployment/web` restart", "Result: SUCCESS", "Successfully restarted 1 resource", %r{Deployment/web.*1 availableReplica}, @@ -150,22 +150,23 @@ def test_restart_one_not_existing_deployment in_order: true) end - def test_restart_none - restart = build_restart_task - assert_restart_failure(restart.perform(deployments: [])) - assert_logs_match_all([ - "Result: FAILURE", - "Configured to restart deployments by name, but list of names was blank", - ], - in_order: true) - end + # TODO: [] is now the default empty value, does this break anything??? + # def test_restart_none + # restart = build_restart_task + # assert_restart_failure(restart.perform(deployments: [])) + # assert_logs_match_all([ + # "Result: FAILURE", + # "Configured to restart deployments by name, but list of names was blank", + # ], + # in_order: true) + # end def test_restart_deployments_and_selector restart = build_restart_task assert_restart_failure(restart.perform(deployments: %w(web), selector: Krane::LabelSelector.parse("app=web"))) assert_logs_match_all([ "Result: FAILURE", - "Can't specify deployment names and selector at the same time", + "Can't specify workload names and selector at the same time", ], in_order: true) end @@ -223,7 +224,7 @@ def test_restart_failure assert_raises(Krane::DeploymentTimeoutError) { restart.perform!(deployments: %w(web)) } assert_logs_match_all([ - "Triggered `web` restart", + "Triggered `Deployment/web` restart", "Deployment/web rollout timed out", "Result: TIMED OUT", "Timed out waiting for 1 resource to restart", @@ -277,8 +278,8 @@ def test_verify_result_false_succeeds assert_restart_success(restart.perform(verify_result: false)) assert_logs_match_all([ - "Configured to restart all deployments with the `shipit.shopify.io/restart` annotation", - "Triggered `web` restart", + "Configured to restart all workloads with the `shipit.shopify.io/restart` annotation", + "Triggered `Deployment/web` restart", "Result: SUCCESS", "Result verification is disabled for this task", ], @@ -292,9 +293,9 @@ def test_verify_result_false_fails_on_config_checks restart = build_restart_task assert_restart_failure(restart.perform(verify_result: false)) assert_logs_match_all([ - "Configured to restart all deployments with the `shipit.shopify.io/restart` annotation", + "Configured to restart all workloads with the `shipit.shopify.io/restart` annotation", "Result: FAILURE", - %r{No deployments with the `shipit\.shopify\.io/restart` annotation found in namespace}, + %r{No deployments, statefulsets, or daemonsets, with the `shipit\.shopify\.io/restart` annotation found in namespace}, ], in_order: true) end @@ -324,7 +325,7 @@ def test_verify_result_false_succeeds_quickly_when_verification_would_timeout restart.perform!(deployments: %w(web), verify_result: false) assert_logs_match_all([ - "Triggered `web` restart", + "Triggered `Deployment/web` restart", "Result: SUCCESS", "Result verification is disabled for this task", ], From 1a5e457ab84fe0787d1a20870bc86d33e5d50b86 Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Thu, 2 Sep 2021 15:47:56 -0400 Subject: [PATCH 05/19] Better restart implementation; watch all resources --- Rakefile | 7 --- lib/krane/restart_task.rb | 90 ++++++++++++++++++++------- test/integration/restart_task_test.rb | 6 +- 3 files changed, 71 insertions(+), 32 deletions(-) diff --git a/Rakefile b/Rakefile index e466a0b10..ff1e9de3e 100644 --- a/Rakefile +++ b/Rakefile @@ -30,13 +30,6 @@ Rake::TestTask.new(:cli_test) do |t| t.test_files = FileList['test/exe/**/*_test.rb'] end -desc("Run restart_task tests") -Rake::TestTask.new(:restart_test) do |t| - t.libs << "test" - t.libs << "lib" - t.test_files = FileList['test/integration/restart_task_test.rb'] -end - desc("Run all tests") task(test: %w(unit_test serial_integration_test integration_test cli_test)) diff --git a/lib/krane/restart_task.rb b/lib/krane/restart_task.rb index e175bc41b..2b0714bf7 100644 --- a/lib/krane/restart_task.rb +++ b/lib/krane/restart_task.rb @@ -22,6 +22,8 @@ def initialize(deployment_name, response) HTTP_OK_RANGE = 200..299 ANNOTATION = "shipit.shopify.io/restart" + RESTART_TRIGGER_ANNOTATION = "kubectl.kubernetes.io/restartedAt" + attr_reader :task_config delegate :kubeclient_builder, to: :task_config @@ -69,10 +71,14 @@ def run!(deployments: [], statefulsets: [], daemonsets: [], selector: nil, verif @logger.phase_heading("Triggering restart by touching ENV[RESTARTED_AT]") patch_kubeclient_deployments(deployments) + patch_kubeclient_statefulsets(statefulsets) + patch_kubeclient_daemonsets(daemonsets) if verify_result @logger.phase_heading("Waiting for rollout") - resources = build_watchables(deployments, start) + resources = build_watchables(deployments, start, Deployment) + resources += build_watchables(statefulsets, start, StatefulSet) + resources += build_watchables(daemonsets, start, DaemonSet) verify_restart(resources) else warning = "Result verification is disabled for this task" @@ -137,9 +143,16 @@ def identify_target_workloads_by_name(deployment_names, statefulset_names, daemo deployment_names = deployment_names.uniq statefulset_names = statefulset_names.uniq daemonset_names = daemonset_names.uniq - @logger.info("Configured to restart deployments by name: #{deployment_names.join(', ')}") if deployment_names.present? - @logger.info("Configured to restart statefulsets by name: #{statefulset_names.join(', ')}") if statefulset_names.present? - @logger.info("Configured to restart daemonsets by name: #{daemonset_names.join(', ')}") if daemonset_names.present? + + if deployment_names.present? + @logger.info("Configured to restart deployments by name: #{deployment_names.join(', ')}") + end + if statefulset_names.present? + @logger.info("Configured to restart statefulsets by name: #{statefulset_names.join(', ')}") + end + if daemonset_names.present? + @logger.info("Configured to restart daemonsets by name: #{daemonset_names.join(', ')}") + end [fetch_deployments(deployment_names), fetch_statefulsets(statefulset_names), fetch_daemonsets(daemonset_names)] end @@ -151,7 +164,6 @@ def identify_target_deployments(selector: nil) selector_string = selector.to_s apps_v1_kubeclient.get_deployments(namespace: @namespace, label_selector: selector_string) end - deployments.each { |d| d.kind = "Deployment" } deployments.select { |d| d.metadata.annotations[ANNOTATION] } end @@ -162,7 +174,6 @@ def identify_target_statefulsets(selector: nil) selector_string = selector.to_s apps_v1_kubeclient.get_stateful_sets(namespace: @namespace, label_selector: selector_string) end - statefulsets.each { |d| d.kind = "StatefulSet" } statefulsets.select { |d| d.metadata.annotations[ANNOTATION] } end @@ -173,14 +184,13 @@ def identify_target_daemonsets(selector: nil) selector_string = selector.to_s apps_v1_kubeclient.get_daemon_sets(namespace: @namespace, label_selector: selector_string) end - daemonsets.each { |d| d.kind = "DaemonSet" } daemonsets.select { |d| d.metadata.annotations[ANNOTATION] } end - def build_watchables(kubeclient_resources, started) + def build_watchables(kubeclient_resources, started, klass) kubeclient_resources.map do |d| definition = d.to_h.deep_stringify_keys - r = Deployment.new(namespace: @namespace, context: @context, definition: definition, logger: @logger) + r = klass.new(namespace: @namespace, context: @context, definition: definition, logger: @logger) r.deploy_started_at = started # we don't care what happened to the resource before the restart cmd ran r end @@ -194,11 +204,49 @@ def patch_deployment_with_restart(record) ) end + def patch_statefulset_with_restart(record) + apps_v1_kubeclient.patch_stateful_set( + record.metadata.name, + build_patch_payload(record), + @namespace + ) + end + + def patch_daemonset_with_restart(record) + apps_v1_kubeclient.patch_daemon_set( + record.metadata.name, + build_patch_payload(record), + @namespace + ) + end + def patch_kubeclient_deployments(deployments) deployments.each do |record| begin patch_deployment_with_restart(record) - @logger.info("Triggered `#{record.kind}/#{record.metadata.name}` restart") + @logger.info("Triggered `Deployment/#{record.metadata.name}` restart") + rescue Kubeclient::HttpError => e + raise RestartAPIError.new(record.metadata.name, e.message) + end + end + end + + def patch_kubeclient_statefulsets(statefulsets) + statefulsets.each do |record| + begin + patch_statefulset_with_restart(record) + @logger.info("Triggered `StatefulSet/#{record.metadata.name}` restart") + rescue Kubeclient::HttpError => e + raise RestartAPIError.new(record.metadata.name, e.message) + end + end + end + + def patch_kubeclient_daemonsets(daemonsets) + daemonsets.each do |record| + begin + patch_daemonset_with_restart(record) + @logger.info("Triggered `DaemonSet/#{record.metadata.name}` restart") rescue Kubeclient::HttpError => e raise RestartAPIError.new(record.metadata.name, e.message) end @@ -221,7 +269,7 @@ def fetch_statefulsets(list) list.map do |name| record = nil begin - record = apps_v1_kubeclient.get_stateful_sets(name, @namespace) + record = apps_v1_kubeclient.get_stateful_set(name, @namespace) rescue Kubeclient::ResourceNotFoundError raise FatalRestartError, "StatefulSet `#{name}` not found in namespace `#{@namespace}`" end @@ -233,7 +281,7 @@ def fetch_daemonsets(list) list.map do |name| record = nil begin - record = apps_v1_kubeclient.get_daemon_sets(name, @namespace) + record = apps_v1_kubeclient.get_daemon_set(name, @namespace) rescue Kubeclient::ResourceNotFoundError raise FatalRestartError, "DaemonSet `#{name}` not found in namespace `#{@namespace}`" end @@ -242,20 +290,16 @@ def fetch_daemonsets(list) end def build_patch_payload(deployment) - containers = deployment.spec.template.spec.containers { spec: { template: { - spec: { - containers: containers.map do |container| - { - name: container.name, - env: [{ name: "RESTARTED_AT", value: Time.now.to_i.to_s }], - } - end, - }, - }, - }, + metadata: { + annotations: { + RESTART_TRIGGER_ANNOTATION => Time.now.utc.to_datetime.rfc3339 + } + } + } + } } end diff --git a/test/integration/restart_task_test.rb b/test/integration/restart_task_test.rb index 5427647e7..6666d34ab 100644 --- a/test/integration/restart_task_test.rb +++ b/test/integration/restart_task_test.rb @@ -66,7 +66,8 @@ def test_restart_by_annotation_none_found assert_logs_match_all([ "Configured to restart all workloads with the `shipit.shopify.io/restart` annotation", "Result: FAILURE", - %r{No deployments, statefulsets, or daemonsets, with the `shipit\.shopify\.io/restart` annotation found in namespace}, + %r{No deployments, statefulsets, or daemonsets, with the `shipit\.shopify\.io/restart` + annotation found in namespace}, ], in_order: true) end @@ -295,7 +296,8 @@ def test_verify_result_false_fails_on_config_checks assert_logs_match_all([ "Configured to restart all workloads with the `shipit.shopify.io/restart` annotation", "Result: FAILURE", - %r{No deployments, statefulsets, or daemonsets, with the `shipit\.shopify\.io/restart` annotation found in namespace}, + %r{No deployments, statefulsets, or daemonsets, with the `shipit\.shopify\.io/restart` + annotation found in namespace}, ], in_order: true) end From a2fd669b9c3d2db9f609ea970eefd7618b2a8bc1 Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Thu, 2 Sep 2021 16:11:25 -0400 Subject: [PATCH 06/19] Better patch method, cop to ignore DateTime usage --- .rubocop.yml | 3 +++ lib/krane/restart_task.rb | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index 557840994..31c3c0f12 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,5 +1,8 @@ inherit_gem: rubocop-shopify: rubocop.yml +Style/DateTime: + Enabled: false + AllCops: TargetRubyVersion: 2.4 diff --git a/lib/krane/restart_task.rb b/lib/krane/restart_task.rb index 2b0714bf7..7226fdb19 100644 --- a/lib/krane/restart_task.rb +++ b/lib/krane/restart_task.rb @@ -289,17 +289,17 @@ def fetch_daemonsets(list) end end - def build_patch_payload(deployment) + def build_patch_payload(_deployment) { spec: { template: { metadata: { annotations: { - RESTART_TRIGGER_ANNOTATION => Time.now.utc.to_datetime.rfc3339 - } - } - } - } + RESTART_TRIGGER_ANNOTATION => Time.now.utc.to_datetime.rfc3339, + }, + }, + }, + }, } end From 20b8125f6e5338c8c3de1f4e80d7a68b9d9bc2da Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Fri, 3 Sep 2021 14:15:50 -0400 Subject: [PATCH 07/19] more test fix --- test/integration/restart_task_test.rb | 63 ++++++--------------------- 1 file changed, 14 insertions(+), 49 deletions(-) diff --git a/test/integration/restart_task_test.rb b/test/integration/restart_task_test.rb index 6666d34ab..6ccfbde33 100644 --- a/test/integration/restart_task_test.rb +++ b/test/integration/restart_task_test.rb @@ -66,8 +66,7 @@ def test_restart_by_annotation_none_found assert_logs_match_all([ "Configured to restart all workloads with the `shipit.shopify.io/restart` annotation", "Result: FAILURE", - %r{No deployments, statefulsets, or daemonsets, with the `shipit\.shopify\.io/restart` - annotation found in namespace}, + %r{No deployments, statefulsets, or daemonsets, with the `shipit\.shopify\.io/restart` annotation found}, ], in_order: true) end @@ -101,7 +100,7 @@ def test_restart_named_deployments_twice second_restarted_at = fetch_restarted_at("web") assert(second_restarted_at, "RESTARTED_AT is present after second restart") - refute_equal(first_restarted_at.value, second_restarted_at.value) + refute_equal(first_restarted_at, second_restarted_at) end def test_restart_with_same_resource_twice @@ -200,44 +199,6 @@ def test_restart_not_existing_namespace in_order: true) end - def test_restart_failure - success = deploy_fixtures("hello-cloud", subset: ["configmap-data.yml", "web.yml.erb"], - render_erb: true) do |fixtures| - deployment = fixtures["web.yml.erb"]["Deployment"].first - deployment["spec"]["progressDeadlineSeconds"] = 30 - container = deployment["spec"]["template"]["spec"]["containers"].first - container["readinessProbe"] = { - "failureThreshold" => 1, - "periodSeconds" => 1, - "initialDelaySeconds" => 0, - "exec" => { - "command" => [ - "/bin/sh", - "-c", - "test $(env | grep -s RESTARTED_AT -c) -eq 0", - ], - }, - } - end - assert_deploy_success(success) - - restart = build_restart_task - assert_raises(Krane::DeploymentTimeoutError) { restart.perform!(deployments: %w(web)) } - - assert_logs_match_all([ - "Triggered `Deployment/web` restart", - "Deployment/web rollout timed out", - "Result: TIMED OUT", - "Timed out waiting for 1 resource to restart", - "Deployment/web: TIMED OUT", - "The following containers have not passed their readiness probes", - "app must exit 0 from the following command", - "Final status: 2 replicas, 1 updatedReplica, 1 availableReplica, 1 unavailableReplica", - "Unhealthy: Readiness probe failed", - ], - in_order: true) - end - def test_restart_successful_with_partial_availability result = deploy_fixtures("slow-cloud", subset: %w(web-deploy-1.yml)) do |fixtures| web = fixtures["web-deploy-1.yml"]["Deployment"].first @@ -255,7 +216,7 @@ def test_restart_successful_with_partial_availability pods = kubeclient.get_pods(namespace: @namespace, label_selector: 'name=web,app=slow-cloud') new_pods = pods.select do |pod| - pod.spec.containers.any? { |c| c["name"] == "app" && c.env&.find { |n| n.name == "RESTARTED_AT" } } + pod.metadata.annotations&.dig(Krane::RestartTask::RESTART_TRIGGER_ANNOTATION) end assert(new_pods.length >= 1, "Expected at least one new pod, saw #{new_pods.length}") @@ -296,8 +257,7 @@ def test_verify_result_false_fails_on_config_checks assert_logs_match_all([ "Configured to restart all workloads with the `shipit.shopify.io/restart` annotation", "Result: FAILURE", - %r{No deployments, statefulsets, or daemonsets, with the `shipit\.shopify\.io/restart` - annotation found in namespace}, + %r{No deployments, statefulsets, or daemonsets, with the `shipit\.shopify\.io/restart` annotation found}, ], in_order: true) end @@ -344,10 +304,15 @@ def build_restart_task ) end - def fetch_restarted_at(deployment_name) - deployment = apps_v1_kubeclient.get_deployment(deployment_name, @namespace) - containers = deployment.spec.template.spec.containers - app_container = containers.find { |c| c["name"] == "app" } - app_container&.env&.find { |n| n.name == "RESTARTED_AT" } + def fetch_restarted_at(name, kind: :deployment) + resource = case kind + when :deployment + apps_v1_kubeclient.get_deployment(name, @namespace) + when :statefulset + apps_v1_kubeclient.get_stateful_set(name, @namespace) + when :daemonset + apps_v1_kubeclient.get_daemonset_set(name, @namespace) + end + resource.spec.template.metadata.annotations&.dig(Krane::RestartTask::RESTART_TRIGGER_ANNOTATION) end end From b40fe532939baa5d8e61654ce9ead2d91d3dca2a Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Fri, 3 Sep 2021 14:28:49 -0400 Subject: [PATCH 08/19] tidyup + test --- lib/krane/restart_task.rb | 22 ++++------------------ test/integration/restart_task_test.rb | 19 ++++++------------- 2 files changed, 10 insertions(+), 31 deletions(-) diff --git a/lib/krane/restart_task.rb b/lib/krane/restart_task.rb index 7226fdb19..b48a77c45 100644 --- a/lib/krane/restart_task.rb +++ b/lib/krane/restart_task.rb @@ -69,7 +69,7 @@ def run!(deployments: [], statefulsets: [], daemonsets: [], selector: nil, verif deployments, statefulsets, daemonsets = identify_target_workloads(deployments, statefulsets, daemonsets, selector: selector) - @logger.phase_heading("Triggering restart by touching ENV[RESTARTED_AT]") + @logger.phase_heading("Triggering restart by annotating pod template #{RESTART_TRIGGER_ANNOTATION} annotation") patch_kubeclient_deployments(deployments) patch_kubeclient_statefulsets(statefulsets) patch_kubeclient_daemonsets(daemonsets) @@ -121,8 +121,6 @@ def identify_target_workloads(deployment_names, statefulset_names, daemonset_nam raise FatalRestartError, "no deployments, statefulsets, or daemonsets, with the `#{ANNOTATION}` " \ "annotation found in namespace #{@namespace}" end - elsif deployment_names.empty? && statefulset_names.empty? && daemonset_names.empty? - raise FatalRestartError, "Configured to restart workloads by name, but list of names was blank" elsif !selector.nil? raise FatalRestartError, "Can't specify workload names and selector at the same time" else @@ -197,27 +195,15 @@ def build_watchables(kubeclient_resources, started, klass) end def patch_deployment_with_restart(record) - apps_v1_kubeclient.patch_deployment( - record.metadata.name, - build_patch_payload(record), - @namespace - ) + apps_v1_kubeclient.patch_deployment(record.metadata.name, build_patch_payload(record), @namespace) end def patch_statefulset_with_restart(record) - apps_v1_kubeclient.patch_stateful_set( - record.metadata.name, - build_patch_payload(record), - @namespace - ) + apps_v1_kubeclient.patch_stateful_set(record.metadata.name, build_patch_payload(record), @namespace) end def patch_daemonset_with_restart(record) - apps_v1_kubeclient.patch_daemon_set( - record.metadata.name, - build_patch_payload(record), - @namespace - ) + apps_v1_kubeclient.patch_daemon_set(record.metadata.name, build_patch_payload(record), @namespace) end def patch_kubeclient_deployments(deployments) diff --git a/test/integration/restart_task_test.rb b/test/integration/restart_task_test.rb index 6ccfbde33..59cd98c9e 100644 --- a/test/integration/restart_task_test.rb +++ b/test/integration/restart_task_test.rb @@ -150,17 +150,6 @@ def test_restart_one_not_existing_deployment in_order: true) end - # TODO: [] is now the default empty value, does this break anything??? - # def test_restart_none - # restart = build_restart_task - # assert_restart_failure(restart.perform(deployments: [])) - # assert_logs_match_all([ - # "Result: FAILURE", - # "Configured to restart deployments by name, but list of names was blank", - # ], - # in_order: true) - # end - def test_restart_deployments_and_selector restart = build_restart_task assert_restart_failure(restart.perform(deployments: %w(web), selector: Krane::LabelSelector.parse("app=web"))) @@ -263,7 +252,8 @@ def test_verify_result_false_fails_on_config_checks end def test_verify_result_false_succeeds_quickly_when_verification_would_timeout - success = deploy_fixtures("hello-cloud", subset: ["configmap-data.yml", "web.yml.erb"], + success = deploy_fixtures("hello-cloud", + subset: ["configmap-data.yml", "web.yml.erb", "daemon_set.yml", "stateful_set.yml"], render_erb: true) do |fixtures| deployment = fixtures["web.yml.erb"]["Deployment"].first deployment["spec"]["progressDeadlineSeconds"] = 30 @@ -284,10 +274,13 @@ def test_verify_result_false_succeeds_quickly_when_verification_would_timeout assert_deploy_success(success) restart = build_restart_task - restart.perform!(deployments: %w(web), verify_result: false) + restart.perform!(deployments: %w(web), statefulsets: %w(stateful-busybox), daemonsets: %w(ds-app), + verify_result: false) assert_logs_match_all([ "Triggered `Deployment/web` restart", + "Triggered `StatefulSet/stateful-busybox` restart", + "Triggered `DaemonSet/ds-app` restart", "Result: SUCCESS", "Result verification is disabled for this task", ], From a56ac7227ffedb4ccf3fedcbf704e334dfd48e3f Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Tue, 7 Sep 2021 11:41:01 -0400 Subject: [PATCH 09/19] Fix README wording --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 760394479..f7cdd1822 100644 --- a/README.md +++ b/README.md @@ -451,7 +451,7 @@ resource to deploy. # krane restart -`krane restart` is a tool for restarting all of the pods in one or more deployments. It triggers the restart by touching the `RESTARTED_AT` environment variable in the deployment's podSpec. The rollout strategy defined for each deployment will be respected by the restart. +`krane restart` is a tool for restarting all of the pods in one or more deployments, statefuls sets, and/or daemon sets. It triggers the restart by patching template metadata with the `kubectl.kubernetes.io/restartedAt` annotation (with the value being an RFC 3339 representation of the current time). Note this is the manner in which `kubectl rollout restart` itself triggers restarts. ## Usage From 180f699f0298254bc02be9f223e1340a63e1f39a Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Tue, 7 Sep 2021 11:41:25 -0400 Subject: [PATCH 10/19] Add dedicated fail-restart fixture, fix krane_test fetch_restarted_at method --- test/fixtures/downward_api/configmap-data.yml | 10 +++ test/fixtures/downward_api/web.yml.erb | 85 +++++++++++++++++++ test/integration/krane_test.rb | 12 ++- test/integration/restart_task_test.rb | 38 +++++++++ 4 files changed, 138 insertions(+), 7 deletions(-) create mode 100644 test/fixtures/downward_api/configmap-data.yml create mode 100644 test/fixtures/downward_api/web.yml.erb diff --git a/test/fixtures/downward_api/configmap-data.yml b/test/fixtures/downward_api/configmap-data.yml new file mode 100644 index 000000000..c7f84cb47 --- /dev/null +++ b/test/fixtures/downward_api/configmap-data.yml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: hello-cloud-configmap-data + labels: + name: hello-cloud-configmap-data + app: hello-cloud +data: + datapoint1: value1 + datapoint2: value2 diff --git a/test/fixtures/downward_api/web.yml.erb b/test/fixtures/downward_api/web.yml.erb new file mode 100644 index 000000000..67ebb45a0 --- /dev/null +++ b/test/fixtures/downward_api/web.yml.erb @@ -0,0 +1,85 @@ +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: web + labels: + name: web + app: hello-cloud +spec: + rules: + - host: hello-cloud.example.com + http: + paths: + - backend: + serviceName: web + servicePort: 80 +--- +apiVersion: v1 +kind: Service +metadata: + name: web + labels: + name: web + app: hello-cloud +spec: + ports: + - port: 80 + name: http + targetPort: http + selector: + name: web + app: hello-cloud +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: web + annotations: + shipit.shopify.io/restart: "true" + labels: + name: web + app: hello-cloud +spec: + replicas: 1 + selector: + matchLabels: + name: web + app: hello-cloud + progressDeadlineSeconds: 60 + template: + metadata: + labels: + name: web + app: hello-cloud + spec: + containers: + - name: app + image: busybox + imagePullPolicy: IfNotPresent + command: ["tail", "-f", "/dev/null"] + ports: + - containerPort: 80 + name: http + env: + - name: GITHUB_REV + value: "<%= current_sha %>" + - name: CONFIG + valueFrom: + configMapKeyRef: + name: hello-cloud-configmap-data + key: datapoint1 + volumeMounts: + - name: podinfo + mountPath: /etc/podinfo + - name: sidecar + image: busybox + imagePullPolicy: IfNotPresent + command: ["tail", "-f", "/dev/null"] + volumes: + - name: podinfo + downwardAPI: + items: + - path: "annotations" + fieldRef: + fieldPath: metadata.annotations + diff --git a/test/integration/krane_test.rb b/test/integration/krane_test.rb index 376de0937..2df9b7ccb 100644 --- a/test/integration/krane_test.rb +++ b/test/integration/krane_test.rb @@ -6,16 +6,16 @@ def test_restart_black_box assert_deploy_success( deploy_fixtures("hello-cloud", subset: ["configmap-data.yml", "web.yml.erb", "redis.yml"], render_erb: true) ) - refute(fetch_restarted_at("web"), "RESTARTED_AT env on fresh deployment") - refute(fetch_restarted_at("redis"), "RESTARTED_AT env on fresh deployment") + refute(fetch_restarted_at("web"), "restart annotation on fresh deployment") + refute(fetch_restarted_at("redis"), "restart annotation on fresh deployment") out, err, status = krane_black_box("restart", "#{@namespace} #{KubeclientHelper::TEST_CONTEXT} --deployments web") assert_empty(out) assert_match("Success", err) assert_predicate(status, :success?) - assert(fetch_restarted_at("web"), "no RESTARTED_AT env is present after the restart") - refute(fetch_restarted_at("redis"), "RESTARTED_AT env is present") + assert(fetch_restarted_at("web"), "no restart annotation is present after the restart") + refute(fetch_restarted_at("redis"), "restart annotation is present") end def test_run_success_black_box @@ -142,8 +142,6 @@ def task_runner_pods def fetch_restarted_at(deployment_name) deployment = apps_v1_kubeclient.get_deployment(deployment_name, @namespace) - containers = deployment.spec.template.spec.containers - app_container = containers.find { |c| c["name"] == "app" } - app_container&.env&.find { |n| n.name == "RESTARTED_AT" } + deployment.spec.template.metadata.annotations&.dig(Krane::RestartTask::RESTART_TRIGGER_ANNOTATION) end end diff --git a/test/integration/restart_task_test.rb b/test/integration/restart_task_test.rb index 59cd98c9e..b9b8e069a 100644 --- a/test/integration/restart_task_test.rb +++ b/test/integration/restart_task_test.rb @@ -188,6 +188,44 @@ def test_restart_not_existing_namespace in_order: true) end + def test_restart_failure + success = deploy_fixtures("downward_api", subset: ["configmap-data.yml", "web.yml.erb"], + render_erb: true) do |fixtures| + deployment = fixtures["web.yml.erb"]["Deployment"].first + deployment["spec"]["progressDeadlineSeconds"] = 30 + container = deployment["spec"]["template"]["spec"]["containers"].first + container["readinessProbe"] = { + "failureThreshold" => 1, + "periodSeconds" => 1, + "initialDelaySeconds" => 0, + "exec" => { + "command" => [ + "/bin/sh", + "-c", + "test $(cat /etc/podinfo/annotations | grep -s kubectl.kubernetes.io/restartedAt -c) -eq 0", + ], + }, + } + end + assert_deploy_success(success) + + restart = build_restart_task + assert_raises(Krane::DeploymentTimeoutError) { restart.perform!(deployments: %w(web)) } + + assert_logs_match_all([ + "Triggered `Deployment/web` restart", + "Deployment/web rollout timed out", + "Result: TIMED OUT", + "Timed out waiting for 1 resource to restart", + "Deployment/web: TIMED OUT", + "The following containers have not passed their readiness probes", + "app must exit 0 from the following command", + "Final status: 2 replicas, 1 updatedReplica, 1 availableReplica, 1 unavailableReplica", + "Unhealthy: Readiness probe failed", + ], + in_order: true) + end + def test_restart_successful_with_partial_availability result = deploy_fixtures("slow-cloud", subset: %w(web-deploy-1.yml)) do |fixtures| web = fixtures["web-deploy-1.yml"]["Deployment"].first From 497a86b83cdb2c4666c7ffc6d8200840d6bfd1c6 Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Tue, 7 Sep 2021 11:43:40 -0400 Subject: [PATCH 11/19] fix test wording --- test/integration/restart_task_test.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/integration/restart_task_test.rb b/test/integration/restart_task_test.rb index b9b8e069a..7c99dcbc5 100644 --- a/test/integration/restart_task_test.rb +++ b/test/integration/restart_task_test.rb @@ -7,8 +7,8 @@ def test_restart_by_annotation assert_deploy_success(deploy_fixtures("hello-cloud", subset: ["configmap-data.yml", "web.yml.erb", "redis.yml"], render_erb: true)) - refute(fetch_restarted_at("web"), "no RESTARTED_AT env on fresh deployment") - refute(fetch_restarted_at("redis"), "no RESTARTED_AT env on fresh deployment") + refute(fetch_restarted_at("web"), "no restart annotation on fresh deployment") + refute(fetch_restarted_at("redis"), "no restart annotation on fresh deployment") restart = build_restart_task assert_restart_success(restart.perform) From 9dd553581f7a43333164a7ebe2b64efae40931f3 Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Tue, 7 Sep 2021 15:01:43 -0400 Subject: [PATCH 12/19] Test restart on SS and DS as well as Deployment --- test/fixtures/branched/daemon_set.yml.erb | 30 ++++++ test/fixtures/branched/stateful_set.yml.erb | 33 ++++++ test/fixtures/hello-cloud/daemon_set.yml | 2 + test/fixtures/hello-cloud/stateful_set.yml | 2 + test/integration/krane_deploy_test.rb | 4 +- test/integration/restart_task_test.rb | 108 ++++++++++++++------ 6 files changed, 144 insertions(+), 35 deletions(-) create mode 100644 test/fixtures/branched/daemon_set.yml.erb create mode 100644 test/fixtures/branched/stateful_set.yml.erb diff --git a/test/fixtures/branched/daemon_set.yml.erb b/test/fixtures/branched/daemon_set.yml.erb new file mode 100644 index 000000000..abdaab77c --- /dev/null +++ b/test/fixtures/branched/daemon_set.yml.erb @@ -0,0 +1,30 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: <%= branch %>-ds-app + labels: + app: branched + branch: <%= branch %> + name: ds-app + annotations: + shipit.shopify.io/restart: "true" +spec: + selector: + matchLabels: + app: branched + branch: <%= branch %> + name: ds-app + template: + metadata: + labels: + app: branched + branch: <%= branch %> + name: ds-app + spec: + containers: + - name: app + image: busybox + imagePullPolicy: IfNotPresent + command: ["tail", "-f", "/dev/null"] + ports: + - containerPort: 80 diff --git a/test/fixtures/branched/stateful_set.yml.erb b/test/fixtures/branched/stateful_set.yml.erb new file mode 100644 index 000000000..0e32e3b8d --- /dev/null +++ b/test/fixtures/branched/stateful_set.yml.erb @@ -0,0 +1,33 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: <%= branch %>-stateful-busybox + labels: + name: stateful-busybox + app: branched + branch: <%= branch %> + annotations: + shipit.shopify.io/restart: "true" +spec: + serviceName: "stateful-busybox" + replicas: 2 + selector: + matchLabels: + app: branched + branch: <%= branch %> + name: stateful-busybox + template: + metadata: + labels: + app: branched + branch: <%= branch %> + name: stateful-busybox + spec: + terminationGracePeriodSeconds: 0 + containers: + - name: app + image: busybox + imagePullPolicy: IfNotPresent + command: ["tail", "-f", "/dev/null"] + ports: + - containerPort: 80 diff --git a/test/fixtures/hello-cloud/daemon_set.yml b/test/fixtures/hello-cloud/daemon_set.yml index c296be0ea..578ec72e5 100644 --- a/test/fixtures/hello-cloud/daemon_set.yml +++ b/test/fixtures/hello-cloud/daemon_set.yml @@ -5,6 +5,8 @@ metadata: labels: app: hello-cloud name: ds-app + annotations: + shipit.shopify.io/restart: "true" spec: selector: matchLabels: diff --git a/test/fixtures/hello-cloud/stateful_set.yml b/test/fixtures/hello-cloud/stateful_set.yml index 2e26d8a92..318b7fc7b 100644 --- a/test/fixtures/hello-cloud/stateful_set.yml +++ b/test/fixtures/hello-cloud/stateful_set.yml @@ -22,6 +22,8 @@ metadata: labels: name: stateful-busybox app: hello-cloud + annotations: + shipit.shopify.io/restart: "true" spec: serviceName: "stateful-busybox" replicas: 2 diff --git a/test/integration/krane_deploy_test.rb b/test/integration/krane_deploy_test.rb index 00305b5aa..ffbcc230e 100644 --- a/test/integration/krane_deploy_test.rb +++ b/test/integration/krane_deploy_test.rb @@ -141,12 +141,12 @@ def test_pruning_disabled def test_selector # Deploy the same thing twice with a different selector - assert_deploy_success(deploy_fixtures("branched", + assert_deploy_success(deploy_fixtures("branched", subset: ["web.yml.erb"] bindings: { "branch" => "master" }, selector: Krane::LabelSelector.parse("branch=master"), render_erb: true)) assert_logs_match("Using resource selector branch=master") - assert_deploy_success(deploy_fixtures("branched", + assert_deploy_success(deploy_fixtures("branched", subset: ["web.yml.erb"] bindings: { "branch" => "staging" }, selector: Krane::LabelSelector.parse("branch=staging"), render_erb: true)) diff --git a/test/integration/restart_task_test.rb b/test/integration/restart_task_test.rb index 7c99dcbc5..6de0791be 100644 --- a/test/integration/restart_task_test.rb +++ b/test/integration/restart_task_test.rb @@ -4,10 +4,13 @@ class RestartTaskTest < Krane::IntegrationTest def test_restart_by_annotation - assert_deploy_success(deploy_fixtures("hello-cloud", subset: ["configmap-data.yml", "web.yml.erb", "redis.yml"], + assert_deploy_success(deploy_fixtures("hello-cloud", + subset: ["configmap-data.yml", "web.yml.erb", "redis.yml", "stateful_set.yml", "daemon_set.yml"], render_erb: true)) refute(fetch_restarted_at("web"), "no restart annotation on fresh deployment") + refute(fetch_restarted_at("stateful-busybox", kind: :statefulset), "no restart annotation on fresh stateful set") + refute(fetch_restarted_at("ds-app", kind: :daemonset), "no restart annotation on fresh daemon set") refute(fetch_restarted_at("redis"), "no restart annotation on fresh deployment") restart = build_restart_task @@ -19,13 +22,15 @@ def test_restart_by_annotation "Waiting for rollout", %r{Successfully restarted in \d+\.\d+s: Deployment/web}, "Result: SUCCESS", - "Successfully restarted 1 resource", + "Successfully restarted 3 resources", %r{Deployment/web.*1 availableReplica}, ], in_order: true) - assert(fetch_restarted_at("web"), "RESTARTED_AT is present after the restart") - refute(fetch_restarted_at("redis"), "no RESTARTED_AT env on fresh deployment") + assert(fetch_restarted_at("web"), "restart annotation is present after the restart") + assert(fetch_restarted_at("stateful-busybox", kind: :statefulset), "no restart annotation on fresh stateful set") + assert(fetch_restarted_at("ds-app", kind: :daemonset), "no restart annotation on fresh daemon set") + refute(fetch_restarted_at("redis"), "no restart annotation env on fresh deployment") end def test_restart_by_selector @@ -38,26 +43,40 @@ def test_restart_by_selector selector: Krane::LabelSelector.parse("branch=staging"), render_erb: true)) - refute(fetch_restarted_at("master-web"), "no RESTARTED_AT env on fresh deployment") - refute(fetch_restarted_at("staging-web"), "no RESTARTED_AT env on fresh deployment") + refute(fetch_restarted_at("master-web"), "no restart annotation on fresh deployment") + refute(fetch_restarted_at("staging-web"), "no restart annotation on fresh deployment") + refute(fetch_restarted_at("master-stateful-busybox", kind: :statefulset), + "no restart annotation on fresh stateful set") + refute(fetch_restarted_at("staging-stateful-busybox", kind: :statefulset), + "no restart annotation on fresh stateful set") + refute(fetch_restarted_at("master-ds-app", kind: :daemonset), "no restart annotation on fresh daemon set") + refute(fetch_restarted_at("staging-ds-app", kind: :daemonset), "no restart annotation on fresh daemon set") restart = build_restart_task - assert_restart_success(restart.perform(selector: Krane::LabelSelector.parse("name=web,branch=staging"))) + assert_restart_success(restart.perform(selector: Krane::LabelSelector.parse("branch=staging"))) assert_logs_match_all([ "Configured to restart all workloads with the `shipit.shopify.io/restart` annotation " \ - "and name=web,branch=staging selector", + "and branch=staging selector", "Triggered `Deployment/staging-web` restart", + "Triggered `StatefulSet/staging-stateful-busybox` restart", + "Triggered `DaemonSet/staging-ds-app` restart", "Waiting for rollout", %r{Successfully restarted in \d+\.\ds: Deployment/staging-web}, "Result: SUCCESS", - "Successfully restarted 1 resource", + "Successfully restarted 3 resources", %r{Deployment/staging-web.*1 availableReplica}, ], in_order: true) - assert(fetch_restarted_at("staging-web"), "RESTARTED_AT is present after the restart") - refute(fetch_restarted_at("master-web"), "no RESTARTED_AT env on fresh deployment") + assert(fetch_restarted_at("staging-web"), "restart annotation is present after the restart") + refute(fetch_restarted_at("master-web"), "no restart annotation on fresh deployment") + assert(fetch_restarted_at("staging-stateful-busybox", kind: :statefulset), + "restart annotation is present after the restart") + refute(fetch_restarted_at("master-stateful-busybox", kind: :statefulset), + "no restart annotation on fresh stateful set") + assert(fetch_restarted_at("staging-ds-app", kind: :daemonset), "restart annotation is present after the restart") + refute(fetch_restarted_at("master-ds-app", kind: :daemonset), "no restart annotation on fresh daemon set") end def test_restart_by_annotation_none_found @@ -71,57 +90,80 @@ def test_restart_by_annotation_none_found in_order: true) end - def test_restart_named_deployments_twice - assert_deploy_success(deploy_fixtures("hello-cloud", subset: ["configmap-data.yml", "web.yml.erb"], + def test_restart_named_workloads_twice + assert_deploy_success(deploy_fixtures("hello-cloud", + subset: ["configmap-data.yml", "web.yml.erb", "stateful_set.yml", "daemon_set.yml"], render_erb: true)) - refute(fetch_restarted_at("web"), "no RESTARTED_AT env on fresh deployment") + refute(fetch_restarted_at("web"), "no restart annotation on fresh deployment") restart = build_restart_task - assert_restart_success(restart.perform(deployments: %w(web))) + assert_restart_success( + restart.perform(deployments: %w(web), statefulsets: %w(stateful-busybox), daemonsets: %w(ds-app)) + ) assert_logs_match_all([ "Configured to restart deployments by name: web", + "Configured to restart statefulsets by name: stateful-busybox", + "Configured to restart daemonsets by name: ds-app", "Triggered `Deployment/web` restart", + "Triggered `StatefulSet/stateful-busybox` restart", + "Triggered `DaemonSet/ds-app` restart", "Waiting for rollout", %r{Successfully restarted in \d+\.\d+s: Deployment/web}, "Result: SUCCESS", - "Successfully restarted 1 resource", + "Successfully restarted 3 resources", %r{Deployment/web.*1 availableReplica}, ], in_order: true) - first_restarted_at = fetch_restarted_at("web") - assert(first_restarted_at, "RESTARTED_AT is present after first restart") + first_restarted_at_deploy = fetch_restarted_at("web") + first_restarted_at_statefulset = fetch_restarted_at("stateful-busybox", kind: :statefulset) + first_restarted_at_daemonset = fetch_restarted_at("ds-app", kind: :daemonset) + assert(first_restarted_at_deploy, "restart annotation is present after first restart") + assert(first_restarted_at_statefulset, "restart annotation is present after first restart") + assert(first_restarted_at_daemonset, "restart annotation is present after first restart") Timecop.freeze(1.second.from_now) do - assert_restart_success(restart.perform(deployments: %w(web))) + assert_restart_success( + restart.perform(deployments: %w(web), statefulsets: %w(stateful-busybox), daemonsets: %w(ds-app)) + ) end - second_restarted_at = fetch_restarted_at("web") - assert(second_restarted_at, "RESTARTED_AT is present after second restart") - refute_equal(first_restarted_at, second_restarted_at) + second_restarted_at_deploy = fetch_restarted_at("web") + second_restarted_at_statefulset = fetch_restarted_at("stateful-busybox", kind: :statefulset) + second_restarted_at_daemonset = fetch_restarted_at("ds-app", kind: :daemonset) + assert(second_restarted_at_deploy, "restart annotation is present after second restart") + assert(second_restarted_at_statefulset, "restart annotation is present after second restart") + assert(second_restarted_at_daemonset, "restart annotation is present after second restart") + refute_equal(first_restarted_at_deploy, second_restarted_at_deploy) + refute_equal(first_restarted_at_statefulset, second_restarted_at_statefulset) + refute_equal(first_restarted_at_daemonset, second_restarted_at_daemonset) end def test_restart_with_same_resource_twice assert_deploy_success(deploy_fixtures("hello-cloud", subset: ["configmap-data.yml", "web.yml.erb"], render_erb: true)) - refute(fetch_restarted_at("web"), "no RESTARTED_AT env on fresh deployment") + refute(fetch_restarted_at("web"), "no restart annotation on fresh deployment") restart = build_restart_task assert_restart_success(restart.perform(deployments: %w(web web))) assert_logs_match_all([ "Configured to restart deployments by name: web", + "Configured to restart statefulsets by name: stateful-busybox", + "Configured to restart daemonsets by name: ds-app", "Triggered `Deployment/web` restart", + "Triggered `StatefulSet/stateful-busybox` restart", + "Triggered `DaemonSet/ds-app` restart", "Result: SUCCESS", - "Successfully restarted 1 resource", + "Successfully restarted 3 resources", %r{Deployment/web.*1 availableReplica}, ], in_order: true) - assert(fetch_restarted_at("web"), "RESTARTED_AT is present after the restart") + assert(fetch_restarted_at("web"), "restart annotation is present after the restart") end def test_restart_not_existing_deployment @@ -141,7 +183,7 @@ def test_restart_one_not_existing_deployment restart = build_restart_task assert_restart_failure(restart.perform(deployments: %w(walrus web))) - refute(fetch_restarted_at("web"), "no RESTARTED_AT env after failed restart task") + refute(fetch_restarted_at("web"), "no restart annotation after failed restart task") assert_logs_match_all([ "Configured to restart deployments by name: walrus, web", "Result: FAILURE", @@ -253,15 +295,15 @@ def test_restart_successful_with_partial_availability end assert_equal(1, new_ready_pods.length, "Expected exactly one new pod to be ready, saw #{new_ready_pods.length}") - assert(fetch_restarted_at("web"), "RESTARTED_AT is present after the restart") + assert(fetch_restarted_at("web"), "restart annotation is present after the restart") end def test_verify_result_false_succeeds assert_deploy_success(deploy_fixtures("hello-cloud", subset: ["configmap-data.yml", "web.yml.erb", "redis.yml"], render_erb: true)) - refute(fetch_restarted_at("web"), "no RESTARTED_AT env on fresh deployment") - refute(fetch_restarted_at("redis"), "no RESTARTED_AT env on fresh deployment") + refute(fetch_restarted_at("web"), "no restart annotation on fresh deployment") + refute(fetch_restarted_at("redis"), "no restart annotation on fresh deployment") restart = build_restart_task assert_restart_success(restart.perform(verify_result: false)) @@ -274,8 +316,8 @@ def test_verify_result_false_succeeds ], in_order: true) - assert(fetch_restarted_at("web"), "RESTARTED_AT is present after the restart") - refute(fetch_restarted_at("redis"), "no RESTARTED_AT env on fresh deployment") + assert(fetch_restarted_at("web"), "restart annotation is present after the restart") + refute(fetch_restarted_at("redis"), "no restart annotation on fresh deployment") end def test_verify_result_false_fails_on_config_checks @@ -304,7 +346,7 @@ def test_verify_result_false_succeeds_quickly_when_verification_would_timeout "command" => [ "/bin/sh", "-c", - "test $(env | grep -s RESTARTED_AT -c) -eq 0", + "test $(env | grep -s restart annotation -c) -eq 0", ], }, } @@ -342,7 +384,7 @@ def fetch_restarted_at(name, kind: :deployment) when :statefulset apps_v1_kubeclient.get_stateful_set(name, @namespace) when :daemonset - apps_v1_kubeclient.get_daemonset_set(name, @namespace) + apps_v1_kubeclient.get_daemon_set(name, @namespace) end resource.spec.template.metadata.annotations&.dig(Krane::RestartTask::RESTART_TRIGGER_ANNOTATION) end From 31a67b3515801c9ff70ae0714ae1d12b7ae06e6b Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Fri, 10 Sep 2021 11:14:51 -0400 Subject: [PATCH 13/19] flesh out test assumptions --- test/integration/restart_task_test.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/integration/restart_task_test.rb b/test/integration/restart_task_test.rb index 6de0791be..ddbce54cb 100644 --- a/test/integration/restart_task_test.rb +++ b/test/integration/restart_task_test.rb @@ -24,12 +24,14 @@ def test_restart_by_annotation "Result: SUCCESS", "Successfully restarted 3 resources", %r{Deployment/web.*1 availableReplica}, + %r{StatefulSet/stateful-busybox.* 2 replicas}, + %r{DaemonSet/ds-app.* 1 updatedNumberScheduled}, ], in_order: true) assert(fetch_restarted_at("web"), "restart annotation is present after the restart") - assert(fetch_restarted_at("stateful-busybox", kind: :statefulset), "no restart annotation on fresh stateful set") - assert(fetch_restarted_at("ds-app", kind: :daemonset), "no restart annotation on fresh daemon set") + assert(fetch_restarted_at("stateful-busybox", kind: :statefulset), "restart annotation on fresh stateful set") + assert(fetch_restarted_at("ds-app", kind: :daemonset), "restart annotation on fresh daemon set") refute(fetch_restarted_at("redis"), "no restart annotation env on fresh deployment") end From af02b40df866ef4f021e229481b753517033e6e4 Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Fri, 10 Sep 2021 12:07:00 -0400 Subject: [PATCH 14/19] fix typo --- test/integration/krane_deploy_test.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/integration/krane_deploy_test.rb b/test/integration/krane_deploy_test.rb index ffbcc230e..d06b5582b 100644 --- a/test/integration/krane_deploy_test.rb +++ b/test/integration/krane_deploy_test.rb @@ -141,12 +141,12 @@ def test_pruning_disabled def test_selector # Deploy the same thing twice with a different selector - assert_deploy_success(deploy_fixtures("branched", subset: ["web.yml.erb"] + assert_deploy_success(deploy_fixtures("branched", subset: ["web.yml.erb"], bindings: { "branch" => "master" }, selector: Krane::LabelSelector.parse("branch=master"), render_erb: true)) assert_logs_match("Using resource selector branch=master") - assert_deploy_success(deploy_fixtures("branched", subset: ["web.yml.erb"] + assert_deploy_success(deploy_fixtures("branched", subset: ["web.yml.erb"], bindings: { "branch" => "staging" }, selector: Krane::LabelSelector.parse("branch=staging"), render_erb: true)) From 85c2a7a7a466ee84e9f1c79dc187ffc717aa027e Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Fri, 10 Sep 2021 12:38:19 -0400 Subject: [PATCH 15/19] fix test --- test/fixtures/downward_api/daemon_set.yml | 34 +++++++++++++ test/fixtures/downward_api/stateful_set.yml | 54 +++++++++++++++++++++ test/integration/restart_task_test.rb | 6 +-- 3 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 test/fixtures/downward_api/daemon_set.yml create mode 100644 test/fixtures/downward_api/stateful_set.yml diff --git a/test/fixtures/downward_api/daemon_set.yml b/test/fixtures/downward_api/daemon_set.yml new file mode 100644 index 000000000..ae7a874f1 --- /dev/null +++ b/test/fixtures/downward_api/daemon_set.yml @@ -0,0 +1,34 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: ds-app + labels: + app: hello-cloud + name: ds-app + annotations: + shipit.shopify.io/restart: "true" +spec: + selector: + matchLabels: + app: hello-cloud + name: ds-app + template: + metadata: + labels: + app: hello-cloud + name: ds-app + spec: + containers: + - name: app + image: busybox + imagePullPolicy: IfNotPresent + command: ["tail", "-f", "/dev/null"] + ports: + - containerPort: 80 + volumes: + - name: podinfo + downwardAPI: + items: + - path: "annotations" + fieldRef: + fieldPath: metadata.annotations diff --git a/test/fixtures/downward_api/stateful_set.yml b/test/fixtures/downward_api/stateful_set.yml new file mode 100644 index 000000000..0843f12a6 --- /dev/null +++ b/test/fixtures/downward_api/stateful_set.yml @@ -0,0 +1,54 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: stateful-busybox + labels: + name: stateful-busybox + app: hello-cloud +spec: + ports: + - port: 80 + name: http + targetPort: http + selector: + name: stateful-busybox + app: hello-cloud +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: stateful-busybox + labels: + name: stateful-busybox + app: hello-cloud + annotations: + shipit.shopify.io/restart: "true" +spec: + serviceName: "stateful-busybox" + replicas: 2 + selector: + matchLabels: + app: hello-cloud + name: stateful-busybox + template: + metadata: + labels: + app: hello-cloud + name: stateful-busybox + spec: + terminationGracePeriodSeconds: 0 + containers: + - name: app + image: busybox + imagePullPolicy: IfNotPresent + command: ["tail", "-f", "/dev/null"] + ports: + - containerPort: 80 + volumes: + - name: podinfo + downwardAPI: + items: + - path: "annotations" + fieldRef: + fieldPath: metadata.annotations \ No newline at end of file diff --git a/test/integration/restart_task_test.rb b/test/integration/restart_task_test.rb index ddbce54cb..8f0797544 100644 --- a/test/integration/restart_task_test.rb +++ b/test/integration/restart_task_test.rb @@ -157,8 +157,6 @@ def test_restart_with_same_resource_twice "Configured to restart statefulsets by name: stateful-busybox", "Configured to restart daemonsets by name: ds-app", "Triggered `Deployment/web` restart", - "Triggered `StatefulSet/stateful-busybox` restart", - "Triggered `DaemonSet/ds-app` restart", "Result: SUCCESS", "Successfully restarted 3 resources", %r{Deployment/web.*1 availableReplica}, @@ -334,7 +332,7 @@ def test_verify_result_false_fails_on_config_checks end def test_verify_result_false_succeeds_quickly_when_verification_would_timeout - success = deploy_fixtures("hello-cloud", + success = deploy_fixtures("downward_api", subset: ["configmap-data.yml", "web.yml.erb", "daemon_set.yml", "stateful_set.yml"], render_erb: true) do |fixtures| deployment = fixtures["web.yml.erb"]["Deployment"].first @@ -348,7 +346,7 @@ def test_verify_result_false_succeeds_quickly_when_verification_would_timeout "command" => [ "/bin/sh", "-c", - "test $(env | grep -s restart annotation -c) -eq 0", + "test $(cat /etc/podinfo/annotations | grep -s kubectl.kubernetes.io/restartedAt -c) -eq 0", ], }, } From e5e7c65e0bff89b03fe5868770d37345be8c0cb3 Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Fri, 10 Sep 2021 12:59:42 -0400 Subject: [PATCH 16/19] yet more fix --- test/integration/restart_task_test.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/integration/restart_task_test.rb b/test/integration/restart_task_test.rb index 8f0797544..e33b162d2 100644 --- a/test/integration/restart_task_test.rb +++ b/test/integration/restart_task_test.rb @@ -154,11 +154,9 @@ def test_restart_with_same_resource_twice assert_logs_match_all([ "Configured to restart deployments by name: web", - "Configured to restart statefulsets by name: stateful-busybox", - "Configured to restart daemonsets by name: ds-app", "Triggered `Deployment/web` restart", "Result: SUCCESS", - "Successfully restarted 3 resources", + "Successfully restarted 1 resource", %r{Deployment/web.*1 availableReplica}, ], in_order: true) From 9a90299c96298cc654b5a0492574bcbaf9c92580 Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Mon, 13 Sep 2021 09:05:59 -0400 Subject: [PATCH 17/19] Add missining newline --- test/fixtures/downward_api/stateful_set.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/fixtures/downward_api/stateful_set.yml b/test/fixtures/downward_api/stateful_set.yml index 0843f12a6..8f43eb870 100644 --- a/test/fixtures/downward_api/stateful_set.yml +++ b/test/fixtures/downward_api/stateful_set.yml @@ -51,4 +51,4 @@ spec: items: - path: "annotations" fieldRef: - fieldPath: metadata.annotations \ No newline at end of file + fieldPath: metadata.annotations From b7f8e878856b9e037e3450246887a7d30ed5fd0b Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Tue, 14 Sep 2021 09:38:14 -0400 Subject: [PATCH 18/19] Add SS and DS count to statsd tags --- lib/krane/restart_task.rb | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/krane/restart_task.rb b/lib/krane/restart_task.rb index b48a77c45..d6f5e3f22 100644 --- a/lib/krane/restart_task.rb +++ b/lib/krane/restart_task.rb @@ -84,14 +84,17 @@ def run!(deployments: [], statefulsets: [], daemonsets: [], selector: nil, verif warning = "Result verification is disabled for this task" @logger.summary.add_paragraph(ColorizedString.new(warning).yellow) end - StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('success', deployments)) + StatsD.client.distribution('restart.duration', StatsD.duration(start), + tags: tags('success', deployments, statefulsets, daemonsets)) @logger.print_summary(:success) rescue DeploymentTimeoutError - StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('timeout', deployments)) + StatsD.client.distribution('restart.duration', StatsD.duration(start), + tags: tags('timeout', deployments, statefulsets, daemonsets)) @logger.print_summary(:timed_out) raise rescue FatalDeploymentError => error - StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('failure', deployments)) + StatsD.client.distribution('restart.duration', StatsD.duration(start), + tags: tags('failure', deployments, statefulsets, daemonsets)) @logger.summary.add_action(error.message) if error.message != error.class.to_s @logger.print_summary(:failure) raise @@ -101,7 +104,8 @@ def run!(deployments: [], statefulsets: [], daemonsets: [], selector: nil, verif private def tags(status, deployments) - %W(namespace:#{@namespace} context:#{@context} status:#{status} deployments:#{deployments.to_a.length}}) + %W(namespace:#{@namespace} context:#{@context} status:#{status} deployments:#{deployments.to_a.length} + statefulsets:#{statefulsets.to_a.length} daemonsets:#{daemonsets.to_a.length}}) end def identify_target_workloads(deployment_names, statefulset_names, daemonset_names, selector: nil) From 6f1a329440c60864b44a3083e11cf500ec69c630 Mon Sep 17 00:00:00 2001 From: Timothy Smith Date: Tue, 14 Sep 2021 10:37:16 -0400 Subject: [PATCH 19/19] Fix #tags method signature --- lib/krane/restart_task.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/krane/restart_task.rb b/lib/krane/restart_task.rb index d6f5e3f22..c8c5b8eee 100644 --- a/lib/krane/restart_task.rb +++ b/lib/krane/restart_task.rb @@ -103,7 +103,7 @@ def run!(deployments: [], statefulsets: [], daemonsets: [], selector: nil, verif private - def tags(status, deployments) + def tags(status, deployments, statefulsets, daemonsets) %W(namespace:#{@namespace} context:#{@context} status:#{status} deployments:#{deployments.to_a.length} statefulsets:#{statefulsets.to_a.length} daemonsets:#{daemonsets.to_a.length}}) end