From f8ba74506cf50c4fce1d04a7ed838de89b61ec54 Mon Sep 17 00:00:00 2001 From: Daniel Turner Date: Thu, 16 Nov 2017 14:50:59 -0600 Subject: [PATCH] partial-rollout-successby using an annotation add a restart test --- .../kubernetes_resource/deployment.rb | 65 ++++++++++++++++--- .../kubernetes_resource/replica_set.rb | 6 +- test/fixtures/slow-cloud/web.yml.erb | 37 +++++++++++ test/integration/kubernetes_deploy_test.rb | 27 ++++++++ test/integration/restart_task_test.rb | 21 ++++++ 5 files changed, 146 insertions(+), 10 deletions(-) create mode 100644 test/fixtures/slow-cloud/web.yml.erb diff --git a/lib/kubernetes-deploy/kubernetes_resource/deployment.rb b/lib/kubernetes-deploy/kubernetes_resource/deployment.rb index 54be13008..1402a3927 100644 --- a/lib/kubernetes-deploy/kubernetes_resource/deployment.rb +++ b/lib/kubernetes-deploy/kubernetes_resource/deployment.rb @@ -2,6 +2,9 @@ module KubernetesDeploy class Deployment < KubernetesResource TIMEOUT = 7.minutes + REQUIRED_ROLLOUT_ANNOTATION = 'kubernetes-deploy.shopify.io/required-rollout' + REQUIRED_ROLLOUT_TYPES = %w(maxUnavailable full none).freeze + DEFAULT_REQUIRED_ROLLOUT = 'full' def sync raw_json, _err, st = kubectl.run("get", type, @name, "--output=json") @@ -43,10 +46,24 @@ def fetch_logs def deploy_succeeded? return false unless @latest_rs.present? - @latest_rs.deploy_succeeded? && - @latest_rs.desired_replicas == @desired_replicas && # latest RS fully scaled up - @rollout_data["updatedReplicas"].to_i == @desired_replicas && - @rollout_data["updatedReplicas"].to_i == @rollout_data["availableReplicas"].to_i + case required_rollout + when 'full' + @latest_rs.deploy_succeeded? && + @latest_rs.desired_replicas == @desired_replicas && # latest RS fully scaled up + @rollout_data["updatedReplicas"].to_i == @desired_replicas && + @rollout_data["updatedReplicas"].to_i == @rollout_data["availableReplicas"].to_i + when 'none' + true + when 'maxUnavailable' + minimum_needed = min_available_replicas + + @latest_rs.desired_replicas >= minimum_needed && + @latest_rs.ready_replicas >= minimum_needed && + @latest_rs.available_replicas >= minimum_needed + else + raise "#{REQUIRED_ROLLOUT_ANNOTATION}:#{required_rollout} is invalid "\ + " Acceptable options: #{REQUIRED_ROLLOUT_TYPES.join(',')}" + end end def deploy_failed? @@ -81,6 +98,18 @@ def exists? @found end + def validate_definition + valid = super + + unless REQUIRED_ROLLOUT_TYPES.include?(required_rollout) + @validation_error_msg ||= '' + @validation_error_msg += "#{required_rollout} is not valid for required-rollout."\ + " Acceptable options: #{REQUIRED_ROLLOUT_TYPES.join(',')}" + return false + end + valid + end + private def deploy_failing_to_progress? @@ -98,18 +127,22 @@ def deploy_failing_to_progress? Time.parse(@progress_condition["lastUpdateTime"]).to_i >= (@deploy_started_at - 5.seconds).to_i end - def find_latest_rs(deployment_data) - label_string = deployment_data["spec"]["selector"]["matchLabels"].map { |k, v| "#{k}=#{v}" }.join(",") + def all_rs_data(match_labels) + label_string = match_labels.map { |k, v| "#{k}=#{v}" }.join(",") raw_json, _err, st = kubectl.run("get", "replicasets", "--output=json", "--selector=#{label_string}") - return unless st.success? + return {} unless st.success? + + JSON.parse(raw_json)["items"] + end - all_rs_data = JSON.parse(raw_json)["items"] + def find_latest_rs(deployment_data) current_revision = deployment_data["metadata"]["annotations"]["deployment.kubernetes.io/revision"] - latest_rs_data = all_rs_data.find do |rs| + latest_rs_data = all_rs_data(deployment_data["spec"]["selector"]["matchLabels"]).find do |rs| rs["metadata"]["ownerReferences"].any? { |ref| ref["uid"] == deployment_data["metadata"]["uid"] } && rs["metadata"]["annotations"]["deployment.kubernetes.io/revision"] == current_revision end + return unless latest_rs_data.present? rs = ReplicaSet.new( @@ -123,5 +156,19 @@ def find_latest_rs(deployment_data) rs.sync(latest_rs_data) rs end + + def min_available_replicas + max_unavailable = @definition.dig('spec', 'strategy', 'rollingUpdate', 'maxUnavailable') + if max_unavailable =~ /%/ + (@desired_replicas * (100 - max_unavailable.to_i) / 100.0).ceil + else + @desired_replicas - max_unavailable.to_i + end + end + + def required_rollout + @definition.dig('metadata', 'annotations', REQUIRED_ROLLOUT_ANNOTATION).presence || + DEFAULT_REQUIRED_ROLLOUT + end end end diff --git a/lib/kubernetes-deploy/kubernetes_resource/replica_set.rb b/lib/kubernetes-deploy/kubernetes_resource/replica_set.rb index 25a091ce7..c83c9d804 100644 --- a/lib/kubernetes-deploy/kubernetes_resource/replica_set.rb +++ b/lib/kubernetes-deploy/kubernetes_resource/replica_set.rb @@ -3,13 +3,15 @@ module KubernetesDeploy class ReplicaSet < PodSetBase TIMEOUT = 5.minutes - attr_reader :desired_replicas, :pods + attr_reader :desired_replicas, :ready_replicas, :available_replicas, :pods def initialize(namespace:, context:, definition:, logger:, parent: nil, deploy_started_at: nil) @parent = parent @deploy_started_at = deploy_started_at @rollout_data = { "replicas" => 0 } @desired_replicas = -1 + @ready_replicas = -1 + @available_replicas = -1 @pods = [] super(namespace: namespace, context: context, definition: definition, logger: logger) end @@ -26,6 +28,8 @@ def sync(rs_data = nil) @rollout_data = { "replicas" => 0 }.merge( rs_data["status"].slice("replicas", "availableReplicas", "readyReplicas") ) + @ready_replicas = @rollout_data['readyReplicas'].to_i + @available_replicas = @rollout_data["availableReplicas"].to_i @status = @rollout_data.map { |state_replicas, num| "#{num} #{state_replicas.chop.pluralize(num)}" }.join(", ") @pods = find_pods(rs_data) else # reset diff --git a/test/fixtures/slow-cloud/web.yml.erb b/test/fixtures/slow-cloud/web.yml.erb new file mode 100644 index 000000000..12c233599 --- /dev/null +++ b/test/fixtures/slow-cloud/web.yml.erb @@ -0,0 +1,37 @@ +apiVersion: apps/v1beta1 +kind: Deployment +metadata: + name: web + annotations: + shipit.shopify.io/restart: "true" + kubernetes-deploy.shopify.io/required-rollout: maxUnavailable +spec: + replicas: 3 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 1 + template: + metadata: + labels: + name: web + app: slow-cloud + spec: + containers: + - name: app + readinessProbe: + exec: + command: + - sleep + - '7' + timeoutSeconds: 10 + image: busybox + imagePullPolicy: IfNotPresent + command: ["tail", "-f", "/dev/null"] + ports: + - containerPort: 80 + name: http + env: + - name: GITHUB_REV + value: <%= current_sha %> diff --git a/test/integration/kubernetes_deploy_test.rb b/test/integration/kubernetes_deploy_test.rb index 7224c5d58..703a24bb4 100644 --- a/test/integration/kubernetes_deploy_test.rb +++ b/test/integration/kubernetes_deploy_test.rb @@ -632,6 +632,33 @@ def test_can_deploy_deployment_with_zero_replicas ]) end + def test_deploy_successful_with_partial_availability + result = deploy_fixtures("slow-cloud") + assert_deploy_success(result) + + result = deploy_fixtures("slow-cloud") + assert_deploy_success(result) + + assert_logs_match_all( + [%r{Deployment\/web\s+[34] replicas, 3 updatedReplicas, 2 availableReplicas, [12] unavailableReplica}] + ) + end + + def test_deploy_successful_with_partial_availability_by_percent + result = deploy_fixtures("slow-cloud") + assert_deploy_success(result) + + result = deploy_fixtures("slow-cloud") do |fixtures| + web = fixtures["web.yml.erb"]["Deployment"].first + web["spec"]["strategy"]['rollingUpdate']['maxUnavailable'] = '34%' + end + assert_deploy_success(result) + + assert_logs_match_all( + [%r{Deployment\/web\s+[34] replicas, 3 updatedReplicas, 2 availableReplicas, [12] unavailableReplica}] + ) + end + def test_deploy_aborts_immediately_if_metadata_name_missing result = deploy_fixtures("hello-cloud", subset: ["configmap-data.yml"]) do |fixtures| definition = fixtures["configmap-data.yml"]["ConfigMap"].first diff --git a/test/integration/restart_task_test.rb b/test/integration/restart_task_test.rb index c787a3d03..8eab2354e 100644 --- a/test/integration/restart_task_test.rb +++ b/test/integration/restart_task_test.rb @@ -206,6 +206,27 @@ def test_restart_failure in_order: true) end + def test_restart_successful_with_partial_availability + result = deploy_fixtures("slow-cloud") + assert_deploy_success(result) + + restart = build_restart_task + assert_restart_success(restart.perform(["web"])) + + assert_logs_match_all([ + "Configured to restart deployments by name: web", + "Triggered `web` restart", + "Waiting for rollout", + %r{Successfully restarted in \d+\.\d+s: Deployment/web}, + "Result: SUCCESS", + "Successfully restarted 1 resource", + %r{Deployment\/web\s+[34] replicas, 3 updatedReplicas, 2 availableReplicas, [12] unavailableReplica} + ], + in_order: true) + + assert fetch_restarted_at("web"), "RESTARTED_AT is present after the restart" + end + private def build_restart_task