diff --git a/lib/krane.rb b/lib/krane.rb index 7ceb29288..3f06a5d61 100644 --- a/lib/krane.rb +++ b/lib/krane.rb @@ -4,3 +4,4 @@ require 'krane/render_task' require 'krane/restart_task' require 'krane/runner_task' +require 'krane/global_deploy_task' diff --git a/lib/krane/cli/global_deploy_command.rb b/lib/krane/cli/global_deploy_command.rb new file mode 100644 index 000000000..10a2d2cc5 --- /dev/null +++ b/lib/krane/cli/global_deploy_command.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module Krane + module CLI + class GlobalDeployCommand + DEFAULT_DEPLOY_TIMEOUT = '300s' + OPTIONS = { + "filenames" => { type: :array, banner: 'config/deploy/production config/deploy/my-extra-resource.yml', + aliases: :f, required: true, + desc: "Directories and files that contains the configuration to apply" }, + "global-timeout" => { type: :string, banner: "duration", default: DEFAULT_DEPLOY_TIMEOUT, + desc: "Max duration to monitor workloads correctly deployed" }, + "verify-result" => { type: :boolean, default: true, + desc: "Verify workloads correctly deployed" }, + "selector" => { type: :string, banner: "'label=value'", required: true, + desc: "Select workloads owned by selector(s)" }, + } + + def self.from_options(context, options) + require 'krane/global_deploy_task' + require 'krane/options_helper' + require 'krane/label_selector' + require 'krane/duration_parser' + + selector = ::Krane::LabelSelector.parse(options[:selector]) + + ::Krane::OptionsHelper.with_processed_template_paths(options[:filenames], + require_explicit_path: true) do |paths| + deploy = ::Krane::GlobalDeployTask.new( + context: context, + filenames: paths, + global_timeout: ::Krane::DurationParser.new(options["global-timeout"]).parse!.to_i, + selector: selector, + ) + + deploy.run!( + verify_result: options["verify-result"], + prune: false, + ) + end + end + end + end +end diff --git a/lib/krane/cli/krane.rb b/lib/krane/cli/krane.rb index 200730358..5d0763448 100644 --- a/lib/krane/cli/krane.rb +++ b/lib/krane/cli/krane.rb @@ -7,6 +7,7 @@ require 'krane/cli/run_command' require 'krane/cli/render_command' require 'krane/cli/deploy_command' +require 'krane/cli/global_deploy_command' module Krane module CLI @@ -58,6 +59,14 @@ def deploy(namespace, context) end end + desc("global-deploy CONTEXT", "Ship non-namespaced resources to a cluster") + expand_options(GlobalDeployCommand::OPTIONS) + def global_deploy(context) + rescue_and_exit do + GlobalDeployCommand.from_options(context, options) + end + end + def self.exit_on_failure? true end diff --git a/lib/krane/cluster_resource_discovery.rb b/lib/krane/cluster_resource_discovery.rb index 9cc8c23db..677376e17 100644 --- a/lib/krane/cluster_resource_discovery.rb +++ b/lib/krane/cluster_resource_discovery.rb @@ -23,7 +23,8 @@ def global_resource_kinds private def fetch_globals - raw, _, st = kubectl.run("api-resources", "--namespaced=false", output: "wide", attempts: 5) + raw, _, st = kubectl.run("api-resources", "--namespaced=false", output: "wide", attempts: 5, + use_namespace: false) if st.success? rows = raw.split("\n") header = rows[0] @@ -42,7 +43,8 @@ def fetch_globals end def fetch_crds - raw_json, _, st = kubectl.run("get", "CustomResourceDefinition", output: "json", attempts: 5) + raw_json, _, st = kubectl.run("get", "CustomResourceDefinition", output: "json", attempts: 5, + use_namespace: false) if st.success? JSON.parse(raw_json)["items"] else diff --git a/lib/krane/concerns/template_reporting.rb b/lib/krane/concerns/template_reporting.rb new file mode 100644 index 000000000..76b40fda7 --- /dev/null +++ b/lib/krane/concerns/template_reporting.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +module Krane + module TemplateReporting + def record_invalid_template(logger:, err:, filename:, content: nil) + debug_msg = ColorizedString.new("Invalid template: #{filename}\n").red + debug_msg += "> Error message:\n#{Krane::FormattedLogger.indent_four(err)}" + if content + debug_msg += if content =~ /kind:\s*Secret/ + "\n> Template content: Suppressed because it may contain a Secret" + else + "\n> Template content:\n#{Krane::FormattedLogger.indent_four(content)}" + end + end + logger.summary.add_paragraph(debug_msg) + end + + def record_warnings(logger:, warning:, filename:) + warn_msg = "Template warning: #{filename}\n" + warn_msg += "> Warning message:\n#{Krane::FormattedLogger.indent_four(warning)}" + logger.summary.add_paragraph(ColorizedString.new(warn_msg).yellow) + end + + def add_para_from_list(logger:, action:, enum:) + logger.summary.add_action(action) + logger.summary.add_paragraph(enum.map { |e| "- #{e}" }.join("\n")) + end + end +end diff --git a/lib/krane/deprecated_deploy_task.rb b/lib/krane/deprecated_deploy_task.rb index 25211b278..d1f2a9e82 100644 --- a/lib/krane/deprecated_deploy_task.rb +++ b/lib/krane/deprecated_deploy_task.rb @@ -41,11 +41,14 @@ require 'krane/cluster_resource_discovery' require 'krane/template_sets' require 'krane/deploy_task_config_validator' +require 'krane/resource_deployer' +require 'krane/concerns/template_reporting' module Krane # Ship resources to a namespace class DeprecatedDeployTask extend Krane::StatsD::MeasureMethods + include Krane::TemplateReporting PROTECTED_NAMESPACES = %w( default @@ -175,7 +178,7 @@ def run!(verify_result: true, allow_protected_ns: false, prune: true) if deploy_has_priority_resources?(resources) @logger.phase_heading("Predeploying priority resources") - predeploy_priority_resources(resources) + resource_deployer.predeploy_priority_resources(resources, predeploy_sequence) end @logger.phase_heading("Deploying all resources") @@ -183,23 +186,8 @@ def run!(verify_result: true, allow_protected_ns: false, prune: true) raise FatalDeploymentError, "Refusing to deploy to protected namespace '#{@namespace}' with pruning enabled" end - if verify_result - deploy_all_resources(resources, prune: prune, verify: true) - failed_resources = resources.reject(&:deploy_succeeded?) - success = failed_resources.empty? - if !success && failed_resources.all?(&:deploy_timed_out?) - raise DeploymentTimeoutError - end - raise FatalDeploymentError unless success - else - deploy_all_resources(resources, prune: prune, verify: false) - @logger.summary.add_action("deployed #{resources.length} #{'resource'.pluralize(resources.length)}") - warning = <<~MSG - Deploy result verification is disabled for this deploy. - This means the desired changes were communicated to Kubernetes, but the deploy did not make sure they actually succeeded. - MSG - @logger.summary.add_paragraph(ColorizedString.new(warning).yellow) - end + resource_deployer.deploy!(resources, verify_result, prune) + StatsD.client.event("Deployment of #{@namespace} succeeded", "Successfully deployed all #{@namespace} resources to #{@context}", alert_type: "success", tags: statsd_tags + %w(status:success)) @@ -227,8 +215,10 @@ def run!(verify_result: true, allow_protected_ns: false, prune: true) private - def global_resource_names - cluster_resource_discoverer.global_resource_kinds + def resource_deployer + @resource_deployer ||= Krane::ResourceDeployer.new(task_config: @task_config, + prune_whitelist: prune_whitelist, max_watch_seconds: @max_watch_seconds, + selector: @selector, statsd_tags: statsd_tags, current_sha: @current_sha) end def kubeclient_builder @@ -258,71 +248,6 @@ def deploy_has_priority_resources?(resources) resources.any? { |r| predeploy_sequence.include?(r.type) } end - def predeploy_priority_resources(resource_list) - bare_pods = resource_list.select { |resource| resource.is_a?(Pod) } - if bare_pods.count == 1 - bare_pods.first.stream_logs = true - end - - predeploy_sequence.each do |resource_type| - matching_resources = resource_list.select { |r| r.type == resource_type } - next if matching_resources.empty? - deploy_resources(matching_resources, verify: true, record_summary: false) - - failed_resources = matching_resources.reject(&:deploy_succeeded?) - fail_count = failed_resources.length - if fail_count > 0 - Krane::Concurrency.split_across_threads(failed_resources) do |r| - r.sync_debug_info(kubectl) - end - failed_resources.each { |r| @logger.summary.add_paragraph(r.debug_message) } - raise FatalDeploymentError, "Failed to deploy #{fail_count} priority #{'resource'.pluralize(fail_count)}" - end - @logger.blank_line - end - end - measure_method(:predeploy_priority_resources, 'priority_resources.duration') - - def validate_resources(resources) - Krane::Concurrency.split_across_threads(resources) do |r| - r.validate_definition(kubectl, selector: @selector) - end - - resources.select(&:has_warnings?).each do |resource| - record_warnings(warning: resource.validation_warning_msg, filename: File.basename(resource.file_path)) - end - - failed_resources = resources.select(&:validation_failed?) - if failed_resources.present? - - failed_resources.each do |r| - content = File.read(r.file_path) if File.file?(r.file_path) && !r.sensitive_template_content? - record_invalid_template(err: r.validation_error_msg, filename: File.basename(r.file_path), content: content) - end - raise FatalDeploymentError, "Template validation failed" - end - validate_globals(resources) - end - measure_method(:validate_resources) - - def validate_globals(resources) - return unless (global = resources.select(&:global?).presence) - global_names = global.map do |resource| - "#{resource.name} (#{resource.type}) in #{File.basename(resource.file_path)}" - end - global_names = FormattedLogger.indent_four(global_names.join("\n")) - - if @allow_globals - msg = "The ability for this task to deploy global resources will be removed in the next version,"\ - " which will affect the following resources:" - msg += "\n#{global_names}" - @logger.summary.add_paragraph(ColorizedString.new(msg).yellow) - else - @logger.summary.add_paragraph(ColorizedString.new("Global resources:\n#{global_names}").yellow) - raise FatalDeploymentError, "This command is namespaced and cannot be used to deploy global resources." - end - end - def check_initial_status(resources) cache = ResourceCache.new(@task_config) Krane::Concurrency.split_across_threads(resources) { |r| r.sync(cache) } @@ -342,7 +267,7 @@ def discover_resources current_sha: @current_sha, bindings: @bindings) do |r_def| crd = crds_by_kind[r_def["kind"]]&.first r = KubernetesResource.build(namespace: @namespace, context: @context, logger: @logger, definition: r_def, - statsd_tags: @namespace_tags, crd: crd, global_names: global_resource_names) + statsd_tags: @namespace_tags, crd: crd, global_names: @task_config.global_kinds) resources << r @logger.info(" - #{r.id}") end @@ -354,30 +279,12 @@ def discover_resources resources.sort rescue InvalidTemplateError => e - record_invalid_template(err: e.message, filename: e.filename, content: e.content) + record_invalid_template(logger: @logger, err: e.message, filename: e.filename, + content: e.content) raise FatalDeploymentError, "Failed to render and parse template" end measure_method(:discover_resources) - def record_invalid_template(err:, filename:, content: nil) - debug_msg = ColorizedString.new("Invalid template: #{filename}\n").red - debug_msg += "> Error message:\n#{FormattedLogger.indent_four(err)}" - if content - debug_msg += if content =~ /kind:\s*Secret/ - "\n> Template content: Suppressed because it may contain a Secret" - else - "\n> Template content:\n#{FormattedLogger.indent_four(content)}" - end - end - @logger.summary.add_paragraph(debug_msg) - end - - def record_warnings(warning:, filename:) - warn_msg = "Template warning: #{filename}\n" - warn_msg += "> Warning message:\n#{FormattedLogger.indent_four(warning)}" - @logger.summary.add_paragraph(ColorizedString.new(warn_msg).yellow) - end - def validate_configuration(allow_protected_ns:, prune:) task_config_validator = DeployTaskConfigValidator.new(@protected_namespaces, allow_protected_ns, prune, @task_config, kubectl, kubeclient_builder) @@ -385,8 +292,7 @@ def validate_configuration(allow_protected_ns:, prune:) errors += task_config_validator.errors errors += @template_sets.validate unless errors.empty? - @logger.summary.add_action("Configuration invalid") - @logger.summary.add_paragraph(errors.map { |err| "- #{err}" }.join("\n")) + add_para_from_list(logger: @logger, action: "Configuration invalid", enum: errors) raise Krane::TaskConfigurationError end @@ -397,191 +303,45 @@ def validate_configuration(allow_protected_ns:, prune:) end measure_method(:validate_configuration) - def deploy_resources(resources, prune: false, verify:, record_summary: true) - return if resources.empty? - deploy_started_at = Time.now.utc - - if resources.length > 1 - @logger.info("Deploying resources:") - resources.each do |r| - @logger.info("- #{r.id} (#{r.pretty_timeout_type})") - end - else - resource = resources.first - @logger.info("Deploying #{resource.id} (#{resource.pretty_timeout_type})") - end - - # Apply can be done in one large batch, the rest have to be done individually - applyables, individuals = resources.partition { |r| r.deploy_method == :apply } - # Prunable resources should also applied so that they can be pruned - pruneable_types = prune_whitelist.map { |t| t.split("/").last } - applyables += individuals.select { |r| pruneable_types.include?(r.type) } - - individuals.each do |individual_resource| - individual_resource.deploy_started_at = Time.now.utc - - case individual_resource.deploy_method - when :create - err, status = create_resource(individual_resource) - when :replace - err, status = replace_or_create_resource(individual_resource) - when :replace_force - err, status = replace_or_create_resource(individual_resource, force: true) - else - # Fail Fast! This is a programmer mistake. - raise ArgumentError, "Unexpected deploy method! (#{individual_resource.deploy_method.inspect})" - end - - next if status.success? - - raise FatalDeploymentError, <<~MSG - Failed to replace or create resource: #{individual_resource.id} - #{individual_resource.sensitive_template_content? ? '' : err} - MSG - end - - apply_all(applyables, prune) - - if verify - watcher = ResourceWatcher.new(resources: resources, deploy_started_at: deploy_started_at, - timeout: @max_watch_seconds, task_config: @task_config, sha: @current_sha) - watcher.run(record_summary: record_summary) - end - end - - def replace_or_create_resource(resource, force: false) - args = if force - ["replace", "--force", "--cascade", "-f", resource.file_path] - else - ["replace", "-f", resource.file_path] + def validate_resources(resources) + validate_globals(resources) + Krane::Concurrency.split_across_threads(resources) do |r| + r.validate_definition(kubectl, selector: @selector) end - _, err, status = kubectl.run(*args, log_failure: false, output_is_sensitive: resource.sensitive_template_content?, - raise_if_not_found: true) - - [err, status] - rescue Krane::Kubectl::ResourceNotFoundError - # it doesn't exist so we can't replace it, we try to create it - create_resource(resource) - end - - def create_resource(resource) - out, err, status = kubectl.run("create", "-f", resource.file_path, log_failure: false, output: 'json', - output_is_sensitive: resource.sensitive_template_content?) - - # For resources that rely on a generateName attribute, we get the `name` from the result of the call to `create` - # We must explicitly set this name value so that the `apply` step for pruning can run successfully - if status.success? && resource.uses_generate_name? - resource.use_generated_name(JSON.parse(out)) + resources.select(&:has_warnings?).each do |resource| + record_warnings(logger: @logger, warning: resource.validation_warning_msg, + filename: File.basename(resource.file_path)) end - [err, status] - end - - def deploy_all_resources(resources, prune: false, verify:, record_summary: true) - deploy_resources(resources, prune: prune, verify: verify, record_summary: record_summary) - end - measure_method(:deploy_all_resources, 'normal_resources.duration') - - def apply_all(resources, prune) - return unless resources.present? - command = %w(apply) - - Dir.mktmpdir do |tmp_dir| - resources.each do |r| - FileUtils.symlink(r.file_path, tmp_dir) - r.deploy_started_at = Time.now.utc - end - command.push("-f", tmp_dir) - - if prune - command.push("--prune") - if @selector - command.push("--selector", @selector.to_s) - else - command.push("--all") - end - prune_whitelist.each { |type| command.push("--prune-whitelist=#{type}") } - end - - output_is_sensitive = resources.any?(&:sensitive_template_content?) - out, err, st = kubectl.run(*command, log_failure: false, output_is_sensitive: output_is_sensitive) + failed_resources = resources.select(&:validation_failed?) + if failed_resources.present? - if st.success? - log_pruning(out) if prune - else - record_apply_failure(err, resources: resources) - raise FatalDeploymentError, "Command failed: #{Shellwords.join(command)}" + failed_resources.each do |r| + content = File.read(r.file_path) if File.file?(r.file_path) && !r.sensitive_template_content? + record_invalid_template(logger: @logger, err: r.validation_error_msg, + filename: File.basename(r.file_path), content: content) end + raise FatalDeploymentError, "Template validation failed" end end - measure_method(:apply_all) - - def log_pruning(kubectl_output) - pruned = kubectl_output.scan(/^(.*) pruned$/) - return unless pruned.present? - - @logger.info("The following resources were pruned: #{pruned.join(', ')}") - @logger.summary.add_action("pruned #{pruned.length} #{'resource'.pluralize(pruned.length)}") - end - - def record_apply_failure(err, resources: []) - warn_msg = "WARNING: Any resources not mentioned in the error(s) below were likely created/updated. " \ - "You may wish to roll back this deploy." - @logger.summary.add_paragraph(ColorizedString.new(warn_msg).yellow) - - unidentified_errors = [] - filenames_with_sensitive_content = resources - .select(&:sensitive_template_content?) - .map { |r| File.basename(r.file_path) } - - server_dry_run_validated_resource = resources - .select(&:server_dry_run_validated?) - .map { |r| File.basename(r.file_path) } - - err.each_line do |line| - bad_files = find_bad_files_from_kubectl_output(line) - unless bad_files.present? - unidentified_errors << line - next - end + measure_method(:validate_resources) - bad_files.each do |f| - err_msg = f[:err] - if filenames_with_sensitive_content.include?(f[:filename]) - # Hide the error and template contents in case it has sensitive information - # we display full error messages as we assume there's no sensitive info leak after server-dry-run - err_msg = "SUPPRESSED FOR SECURITY" unless server_dry_run_validated_resource.include?(f[:filename]) - record_invalid_template(err: err_msg, filename: f[:filename], content: nil) - else - record_invalid_template(err: err_msg, filename: f[:filename], content: f[:content]) - end - end + def validate_globals(resources) + return unless (global = resources.select(&:global?).presence) + global_names = global.map do |resource| + "#{resource.name} (#{resource.type}) in #{File.basename(resource.file_path)}" end - return unless unidentified_errors.any? + global_names = FormattedLogger.indent_four(global_names.join("\n")) - if (filenames_with_sensitive_content - server_dry_run_validated_resource).present? - warn_msg = "WARNING: There was an error applying some or all resources. The raw output may be sensitive and " \ - "so cannot be displayed." - @logger.summary.add_paragraph(ColorizedString.new(warn_msg).yellow) + if @allow_globals + msg = "The ability for this task to deploy global resources will be removed in the next version,"\ + " which will affect the following resources:" + msg += "\n#{global_names}" + @logger.summary.add_paragraph(ColorizedString.new(msg).yellow) else - heading = ColorizedString.new('Unidentified error(s):').red - msg = FormattedLogger.indent_four(unidentified_errors.join) - @logger.summary.add_paragraph("#{heading}\n#{msg}") - end - end - - # Inspect the file referenced in the kubectl stderr - # to make it easier for developer to understand what's going on - def find_bad_files_from_kubectl_output(line) - # stderr often contains one or more lines like the following, from which we can extract the file path(s): - # Error from server (TypeOfError): error when creating "/path/to/service-gqq5oh.yml": Service "web" is invalid: - - line.scan(%r{"(/\S+\.ya?ml\S*)"}).each_with_object([]) do |matches, bad_files| - matches.each do |path| - content = File.read(path) if File.file?(path) - bad_files << { filename: File.basename(path), err: line, content: content } - end + @logger.summary.add_paragraph(ColorizedString.new("Global resources:\n#{global_names}").yellow) + raise FatalDeploymentError, "This command is namespaced and cannot be used to deploy global resources." end end diff --git a/lib/krane/global_deploy_task.rb b/lib/krane/global_deploy_task.rb new file mode 100644 index 000000000..eaa17deee --- /dev/null +++ b/lib/krane/global_deploy_task.rb @@ -0,0 +1,207 @@ +# frozen_string_literal: true +require 'tempfile' + +require 'krane/common' +require 'krane/concurrency' +require 'krane/resource_cache' +require 'krane/kubectl' +require 'krane/kubeclient_builder' +require 'krane/cluster_resource_discovery' +require 'krane/template_sets' +require 'krane/resource_deployer' +require 'krane/kubernetes_resource' +require 'krane/global_deploy_task_config_validator' +require 'krane/concerns/template_reporting' + +%w( + custom_resource + custom_resource_definition +).each do |subresource| + require "krane/kubernetes_resource/#{subresource}" +end + +module Krane + # Ship global resources to a context + class GlobalDeployTask + extend Krane::StatsD::MeasureMethods + include TemplateReporting + delegate :context, :logger, :global_kinds, to: :@task_config + + # Initializes the deploy task + # + # @param context [String] Kubernetes context + # @param global_timeout [Integer] Timeout in seconds + # @param selector [Hash] Selector(s) parsed by Krane::LabelSelector + # @param template_paths [Array] An array of template paths + def initialize(context:, global_timeout: nil, selector: nil, filenames: [], logger: nil) + template_paths = filenames.map { |path| File.expand_path(path) } + + @task_config = TaskConfig.new(context, nil, logger) + @template_sets = TemplateSets.from_dirs_and_files(paths: template_paths, + logger: @task_config.logger) + @global_timeout = global_timeout + @selector = selector + end + + # Runs the task, returning a boolean representing success or failure + # + # @return [Boolean] + def run(*args) + run!(*args) + true + rescue FatalDeploymentError + false + end + + # Runs the task, raising exceptions in case of issues + # + # @param verify_result [Boolean] Wait for completion and verify success + # @param prune [Boolean] Enable deletion of resources that match the provided + # selector and do not appear in the template dir + # + # @return [nil] + def run!(verify_result: true, prune: true) + start = Time.now.utc + logger.reset + + logger.phase_heading("Initializing deploy") + validate_configuration + resources = discover_resources + validate_resources(resources) + + logger.phase_heading("Checking initial resource statuses") + check_initial_status(resources) + + logger.phase_heading("Deploying all resources") + deploy!(resources, verify_result, prune) + + StatsD.client.event("Deployment succeeded", + "Successfully deployed all resources to #{context}", + alert_type: "success", tags: statsd_tags + %w(status:success)) + StatsD.client.distribution('all_resources.duration', StatsD.duration(start), + tags: statsd_tags << "status:success") + logger.print_summary(:success) + rescue Krane::DeploymentTimeoutError + logger.print_summary(:timed_out) + StatsD.client.event("Deployment timed out", + "One or more resources failed to deploy to #{context} in time", + alert_type: "error", tags: statsd_tags + %w(status:timeout)) + StatsD.client.distribution('all_resources.duration', StatsD.duration(start), + tags: statsd_tags << "status:timeout") + raise + rescue Krane::FatalDeploymentError => error + logger.summary.add_action(error.message) if error.message != error.class.to_s + logger.print_summary(:failure) + StatsD.client.event("Deployment failed", + "One or more resources failed to deploy to #{context}", + alert_type: "error", tags: statsd_tags + %w(status:failed)) + StatsD.client.distribution('all_resources.duration', StatsD.duration(start), + tags: statsd_tags << "status:failed") + raise + end + + private + + def deploy!(resources, verify_result, prune) + prune_whitelist = [] + resource_deployer = ResourceDeployer.new(task_config: @task_config, + prune_whitelist: prune_whitelist, max_watch_seconds: @global_timeout, + selector: @selector, statsd_tags: statsd_tags) + resource_deployer.deploy!(resources, verify_result, prune) + end + + def validate_configuration + task_config_validator = GlobalDeployTaskConfigValidator.new(@task_config, + kubectl, kubeclient_builder) + errors = [] + errors += task_config_validator.errors + errors += @template_sets.validate + errors << "Selector is required" unless @selector.to_h.present? + unless errors.empty? + add_para_from_list(logger: logger, action: "Configuration invalid", enum: errors) + raise TaskConfigurationError + end + + logger.info("Using resource selector #{@selector}") + logger.info("All required parameters and files are present") + end + measure_method(:validate_configuration) + + def validate_resources(resources) + validate_globals(resources) + + Concurrency.split_across_threads(resources) do |r| + r.validate_definition(@kubectl, selector: @selector) + end + + resources.select(&:has_warnings?).each do |resource| + record_warnings(logger: logger, warning: resource.validation_warning_msg, + filename: File.basename(resource.file_path)) + end + + failed_resources = resources.select(&:validation_failed?) + if failed_resources.present? + failed_resources.each do |r| + content = File.read(r.file_path) if File.file?(r.file_path) && !r.sensitive_template_content? + record_invalid_template(logger: logger, err: r.validation_error_msg, + filename: File.basename(r.file_path), content: content) + end + raise FatalDeploymentError, "Template validation failed" + end + end + measure_method(:validate_resources) + + def validate_globals(resources) + return unless (namespaced = resources.reject(&:global?).presence) + namespaced_names = namespaced.map do |resource| + "#{resource.name} (#{resource.type}) in #{File.basename(resource.file_path)}" + end + namespaced_names = FormattedLogger.indent_four(namespaced_names.join("\n")) + + logger.summary.add_paragraph(ColorizedString.new("Namespaced resources:\n#{namespaced_names}").yellow) + raise FatalDeploymentError, "This command cannot deploy namespaced resources" + end + + def discover_resources + logger.info("Discovering resources:") + resources = [] + crds_by_kind = cluster_resource_discoverer.crds.map { |crd| [crd.name, crd] }.to_h + @template_sets.with_resource_definitions do |r_def| + crd = crds_by_kind[r_def["kind"]]&.first + r = KubernetesResource.build(context: context, logger: logger, definition: r_def, + crd: crd, global_names: global_kinds, statsd_tags: statsd_tags) + resources << r + logger.info(" - #{r.id}") + end + + resources.sort + rescue InvalidTemplateError => e + record_invalid_template(logger: logger, err: e.message, filename: e.filename, content: e.content) + raise FatalDeploymentError, "Failed to parse template" + end + measure_method(:discover_resources) + + def cluster_resource_discoverer + @cluster_resource_discoverer ||= ClusterResourceDiscovery.new(task_config: @task_config) + end + + def statsd_tags + %W(context:#{context}) + end + + def kubectl + @kubectl ||= Kubectl.new(task_config: @task_config, log_failure_by_default: true) + end + + def kubeclient_builder + @kubeclient_builder ||= KubeclientBuilder.new + end + + def check_initial_status(resources) + cache = ResourceCache.new(@task_config) + Concurrency.split_across_threads(resources) { |r| r.sync(cache) } + resources.each { |r| logger.info(r.pretty_status) } + end + measure_method(:check_initial_status, "initial_status.duration") + end +end diff --git a/lib/krane/global_deploy_task_config_validator.rb b/lib/krane/global_deploy_task_config_validator.rb new file mode 100644 index 000000000..88c09e55b --- /dev/null +++ b/lib/krane/global_deploy_task_config_validator.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +require 'krane/task_config_validator' + +module Krane + class GlobalDeployTaskConfigValidator < Krane::TaskConfigValidator + def initialize(*arguments) + super(*arguments) + @validations -= [:validate_namespace_exists] + end + end +end diff --git a/lib/krane/kubernetes_resource.rb b/lib/krane/kubernetes_resource.rb index 921f8e4f6..132bdc61f 100644 --- a/lib/krane/kubernetes_resource.rb +++ b/lib/krane/kubernetes_resource.rb @@ -40,7 +40,7 @@ class KubernetesResource SERVER_DRY_RUNNABLE = false class << self - def build(namespace:, context:, definition:, logger:, statsd_tags:, crd: nil, global_names: []) + def build(namespace: nil, context:, definition:, logger:, statsd_tags:, crd: nil, global_names: []) validate_definition_essentials(definition) opts = { namespace: namespace, context: context, definition: definition, logger: logger, statsd_tags: statsd_tags } @@ -322,7 +322,7 @@ def debug_message(cause = nil, info_hash = {}) def fetch_events(kubectl) return {} unless exists? out, _err, st = kubectl.run("get", "events", "--output=go-template=#{Event.go_template_for(type, name)}", - log_failure: false) + log_failure: false, use_namespace: !global?) return {} unless st.success? event_collector = Hash.new { |hash, key| hash[key] = [] } @@ -532,7 +532,7 @@ def validate_with_local_dry_run(kubectl) verb = deploy_method == :apply ? "apply" : "create" command = [verb, "-f", file_path, "--dry-run", "--output=name"] kubectl.run(*command, log_failure: false, output_is_sensitive: sensitive_template_content?, - retry_whitelist: [:client_timeout], attempts: 3) + retry_whitelist: [:client_timeout], attempts: 3, use_namespace: !global?) end def labels diff --git a/lib/krane/resource_cache.rb b/lib/krane/resource_cache.rb index 27e638dc7..e0030fb8a 100644 --- a/lib/krane/resource_cache.rb +++ b/lib/krane/resource_cache.rb @@ -51,9 +51,10 @@ def use_or_populate_cache(kind) def fetch_by_kind(kind) resource_class = KubernetesResource.class_for_kind(kind) + global_kind = @task_config.global_kinds.map(&:downcase).include?(kind.downcase) output_is_sensitive = resource_class.nil? ? false : resource_class::SENSITIVE_TEMPLATE_CONTENT raw_json, _, st = @kubectl.run("get", kind, "--chunk-size=0", attempts: 5, output: "json", - output_is_sensitive: output_is_sensitive) + output_is_sensitive: output_is_sensitive, use_namespace: !global_kind) raise KubectlError unless st.success? instances = {} diff --git a/lib/krane/resource_deployer.rb b/lib/krane/resource_deployer.rb new file mode 100644 index 000000000..4ffd42a0e --- /dev/null +++ b/lib/krane/resource_deployer.rb @@ -0,0 +1,265 @@ +# frozen_string_literal: true + +require 'krane/resource_watcher' +require 'krane/concerns/template_reporting' + +module Krane + class ResourceDeployer + extend Krane::StatsD::MeasureMethods + include Krane::TemplateReporting + + delegate :logger, to: :@task_config + attr_reader :statsd_tags + + def initialize(task_config:, prune_whitelist:, max_watch_seconds:, current_sha: nil, selector:, statsd_tags:) + @task_config = task_config + @prune_whitelist = prune_whitelist + @max_watch_seconds = max_watch_seconds + @current_sha = current_sha + @selector = selector + @statsd_tags = statsd_tags + end + + def deploy!(resources, verify_result, prune) + if verify_result + deploy_all_resources(resources, prune: prune, verify: true) + failed_resources = resources.reject(&:deploy_succeeded?) + success = failed_resources.empty? + if !success && failed_resources.all?(&:deploy_timed_out?) + raise DeploymentTimeoutError + end + raise FatalDeploymentError unless success + else + deploy_all_resources(resources, prune: prune, verify: false) + logger.summary.add_action("deployed #{resources.length} #{'resource'.pluralize(resources.length)}") + warning = <<~MSG + Deploy result verification is disabled for this deploy. + This means the desired changes were communicated to Kubernetes, but the deploy did not make sure they actually succeeded. + MSG + logger.summary.add_paragraph(ColorizedString.new(warning).yellow) + end + end + + def predeploy_priority_resources(resource_list, predeploy_sequence) + bare_pods = resource_list.select { |resource| resource.is_a?(Pod) } + if bare_pods.count == 1 + bare_pods.first.stream_logs = true + end + + predeploy_sequence.each do |resource_type| + matching_resources = resource_list.select { |r| r.type == resource_type } + next if matching_resources.empty? + deploy_resources(matching_resources, verify: true, record_summary: false) + + failed_resources = matching_resources.reject(&:deploy_succeeded?) + fail_count = failed_resources.length + if fail_count > 0 + Krane::Concurrency.split_across_threads(failed_resources) do |r| + r.sync_debug_info(kubectl) + end + failed_resources.each { |r| logger.summary.add_paragraph(r.debug_message) } + raise FatalDeploymentError, "Failed to deploy #{fail_count} priority #{'resource'.pluralize(fail_count)}" + end + logger.blank_line + end + end + measure_method(:predeploy_priority_resources, 'priority_resources.duration') + + private + + def deploy_all_resources(resources, prune: false, verify:, record_summary: true) + deploy_resources(resources, prune: prune, verify: verify, record_summary: record_summary) + end + measure_method(:deploy_all_resources, 'normal_resources.duration') + + def deploy_resources(resources, prune: false, verify:, record_summary: true) + return if resources.empty? + deploy_started_at = Time.now.utc + + if resources.length > 1 + logger.info("Deploying resources:") + resources.each do |r| + logger.info("- #{r.id} (#{r.pretty_timeout_type})") + end + else + resource = resources.first + logger.info("Deploying #{resource.id} (#{resource.pretty_timeout_type})") + end + + # Apply can be done in one large batch, the rest have to be done individually + applyables, individuals = resources.partition { |r| r.deploy_method == :apply } + # Prunable resources should also applied so that they can be pruned + pruneable_types = @prune_whitelist.map { |t| t.split("/").last } + applyables += individuals.select { |r| pruneable_types.include?(r.type) } + + individuals.each do |individual_resource| + individual_resource.deploy_started_at = Time.now.utc + + case individual_resource.deploy_method + when :create + err, status = create_resource(individual_resource) + when :replace + err, status = replace_or_create_resource(individual_resource) + when :replace_force + err, status = replace_or_create_resource(individual_resource, force: true) + else + # Fail Fast! This is a programmer mistake. + raise ArgumentError, "Unexpected deploy method! (#{individual_resource.deploy_method.inspect})" + end + + next if status.success? + + raise FatalDeploymentError, <<~MSG + Failed to replace or create resource: #{individual_resource.id} + #{individual_resource.sensitive_template_content? ? '' : err} + MSG + end + + apply_all(applyables, prune) + + if verify + watcher = Krane::ResourceWatcher.new(resources: resources, deploy_started_at: deploy_started_at, + timeout: @max_watch_seconds, task_config: @task_config, sha: @current_sha) + watcher.run(record_summary: record_summary) + end + end + + def apply_all(resources, prune) + return unless resources.present? + command = %w(apply) + + Dir.mktmpdir do |tmp_dir| + resources.each do |r| + FileUtils.symlink(r.file_path, tmp_dir) + r.deploy_started_at = Time.now.utc + end + command.push("-f", tmp_dir) + + if prune && @prune_whitelist.present? + command.push("--prune") + if @selector + command.push("--selector", @selector.to_s) + else + command.push("--all") + end + @prune_whitelist.each { |type| command.push("--prune-whitelist=#{type}") } + end + + output_is_sensitive = resources.any?(&:sensitive_template_content?) + global_mode = resources.all?(&:global?) + out, err, st = kubectl.run(*command, log_failure: false, output_is_sensitive: output_is_sensitive, + use_namespace: !global_mode) + + if st.success? + log_pruning(out) if prune + else + record_apply_failure(err, resources: resources) + raise FatalDeploymentError, "Command failed: #{Shellwords.join(command)}" + end + end + end + measure_method(:apply_all) + + def log_pruning(kubectl_output) + pruned = kubectl_output.scan(/^(.*) pruned$/) + return unless pruned.present? + + logger.info("The following resources were pruned: #{pruned.join(', ')}") + logger.summary.add_action("pruned #{pruned.length} #{'resource'.pluralize(pruned.length)}") + end + + def record_apply_failure(err, resources: []) + warn_msg = "WARNING: Any resources not mentioned in the error(s) below were likely created/updated. " \ + "You may wish to roll back this deploy." + logger.summary.add_paragraph(ColorizedString.new(warn_msg).yellow) + + unidentified_errors = [] + filenames_with_sensitive_content = resources + .select(&:sensitive_template_content?) + .map { |r| File.basename(r.file_path) } + + server_dry_run_validated_resource = resources + .select(&:server_dry_run_validated?) + .map { |r| File.basename(r.file_path) } + + err.each_line do |line| + bad_files = find_bad_files_from_kubectl_output(line) + unless bad_files.present? + unidentified_errors << line + next + end + + bad_files.each do |f| + err_msg = f[:err] + if filenames_with_sensitive_content.include?(f[:filename]) + # Hide the error and template contents in case it has sensitive information + # we display full error messages as we assume there's no sensitive info leak after server-dry-run + err_msg = "SUPPRESSED FOR SECURITY" unless server_dry_run_validated_resource.include?(f[:filename]) + record_invalid_template(logger: logger, err: err_msg, filename: f[:filename], content: nil) + else + record_invalid_template(logger: logger, err: err_msg, filename: f[:filename], content: f[:content]) + end + end + end + return unless unidentified_errors.any? + + if (filenames_with_sensitive_content - server_dry_run_validated_resource).present? + warn_msg = "WARNING: There was an error applying some or all resources. The raw output may be sensitive and " \ + "so cannot be displayed." + logger.summary.add_paragraph(ColorizedString.new(warn_msg).yellow) + else + heading = ColorizedString.new('Unidentified error(s):').red + msg = FormattedLogger.indent_four(unidentified_errors.join) + logger.summary.add_paragraph("#{heading}\n#{msg}") + end + end + + def replace_or_create_resource(resource, force: false) + args = if force + ["replace", "--force", "--cascade", "-f", resource.file_path] + else + ["replace", "-f", resource.file_path] + end + + _, err, status = kubectl.run(*args, log_failure: false, output_is_sensitive: resource.sensitive_template_content?, + raise_if_not_found: true, use_namespace: !resource.global?) + + [err, status] + rescue Krane::Kubectl::ResourceNotFoundError + # it doesn't exist so we can't replace it, we try to create it + create_resource(resource) + end + + def create_resource(resource) + out, err, status = kubectl.run("create", "-f", resource.file_path, log_failure: false, + output: 'json', output_is_sensitive: resource.sensitive_template_content?, + use_namespace: !resource.global?) + + # For resources that rely on a generateName attribute, we get the `name` from the result of the call to `create` + # We must explicitly set this name value so that the `apply` step for pruning can run successfully + if status.success? && resource.uses_generate_name? + resource.use_generated_name(JSON.parse(out)) + end + + [err, status] + end + + # Inspect the file referenced in the kubectl stderr + # to make it easier for developer to understand what's going on + def find_bad_files_from_kubectl_output(line) + # stderr often contains one or more lines like the following, from which we can extract the file path(s): + # Error from server (TypeOfError): error when creating "/path/to/service-gqq5oh.yml": Service "web" is invalid: + + line.scan(%r{"(/\S+\.ya?ml\S*)"}).each_with_object([]) do |matches, bad_files| + matches.each do |path| + content = File.read(path) if File.file?(path) + bad_files << { filename: File.basename(path), err: line, content: content } + end + end + end + + def kubectl + @kubectl ||= Kubectl.new(task_config: @task_config, log_failure_by_default: true) + end + end +end diff --git a/lib/krane/task_config.rb b/lib/krane/task_config.rb index e0d2bb12f..41bb0d033 100644 --- a/lib/krane/task_config.rb +++ b/lib/krane/task_config.rb @@ -1,16 +1,19 @@ # frozen_string_literal: true module Krane class TaskConfig - attr_reader :context, :namespace + attr_reader :context, :namespace, :logger def initialize(context, namespace, logger = nil) @context = context @namespace = namespace - @logger = logger + @logger = logger || FormattedLogger.build(@namespace, @context) end - def logger - @logger ||= Krane::FormattedLogger.build(@namespace, @context) + def global_kinds + @global_kinds ||= begin + cluster_resource_discoverer = ClusterResourceDiscovery.new(task_config: self) + cluster_resource_discoverer.global_resource_kinds + end end end end diff --git a/test/exe/global_deploy_test.rb b/test/exe/global_deploy_test.rb new file mode 100644 index 000000000..4fc658988 --- /dev/null +++ b/test/exe/global_deploy_test.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true +require 'test_helper' +require 'krane/cli/krane' +require 'krane/global_deploy_task' + +class GlobalDeployTest < Krane::TestCase + def test_global_deploy_with_default_options + set_krane_global_deploy_expectations! + krane_global_deploy! + end + + def test_deploy_parses_global_timeout + set_krane_global_deploy_expectations!(new_args: { global_timeout: 10 }) + krane_global_deploy!(flags: '--global-timeout 10s') + set_krane_global_deploy_expectations!(new_args: { global_timeout: 60**2 }) + krane_global_deploy!(flags: '--global-timeout 1h') + end + + def test_deploy_passes_verify_result + set_krane_global_deploy_expectations!(run_args: { verify_result: true }) + krane_global_deploy!(flags: '--verify-result true') + set_krane_global_deploy_expectations!(run_args: { verify_result: false }) + krane_global_deploy!(flags: '--verify-result false') + end + + def test_deploy_passes_filename + set_krane_global_deploy_expectations!(new_args: { filenames: ['/my/file/path'] }) + krane_global_deploy!(flags: '-f /my/file/path') + set_krane_global_deploy_expectations!(new_args: { filenames: %w(/my/other/file/path just/a/file.yml) }) + krane_global_deploy!(flags: '--filenames /my/other/file/path just/a/file.yml') + end + + def test_deploy_parses_selector + selector = 'name=web' + set_krane_global_deploy_expectations!(new_args: { selector: selector }) + krane_global_deploy!(flags: "--selector #{selector}") + end + + private + + def set_krane_global_deploy_expectations!(new_args: {}, run_args: {}) + options = default_options(new_args, run_args) + response = mock('GlobalDeployTask') + response.expects(:run!).with(options[:run_args]).returns(true) + Krane::GlobalDeployTask.expects(:new).with do |args| + args.except(:selector) == options[:new_args].except(:selector) && + args[:selector].to_s == options[:new_args][:selector] + end.returns(response) + end + + def krane_global_deploy!(flags: '') + flags += ' -f /tmp' unless flags.include?('-f') + flags += ' --selector name=web' unless flags.include?('--selector') + krane = Krane::CLI::Krane.new( + [task_config.context], + flags.split + ) + krane.invoke("global_deploy") + end + + def default_options(new_args = {}, run_args = {}) + { + new_args: { + context: task_config.context, + filenames: ['/tmp'], + global_timeout: 300, + selector: 'name=web', + }.merge(new_args), + run_args: { + verify_result: true, + prune: false, + }.merge(run_args), + } + end +end diff --git a/test/fixtures/crd/mail.yml b/test/fixtures/crd/mail.yml index f8b9a1c4d..906ab1f16 100644 --- a/test/fixtures/crd/mail.yml +++ b/test/fixtures/crd/mail.yml @@ -2,6 +2,8 @@ apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: name: mail.stable.example.io + labels: + app: krane spec: group: stable.example.io names: diff --git a/test/fixtures/globals/storage_classes.yml b/test/fixtures/globals/storage_classes.yml index f60e39a4c..86eb51c0a 100644 --- a/test/fixtures/globals/storage_classes.yml +++ b/test/fixtures/globals/storage_classes.yml @@ -2,4 +2,6 @@ apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: name: testing-storage-class + labels: + app: krane provisioner: kubernetes.io/no-provisioner diff --git a/test/helpers/fixture_deploy_helper.rb b/test/helpers/fixture_deploy_helper.rb index 58accad03..b51f87aad 100644 --- a/test/helpers/fixture_deploy_helper.rb +++ b/test/helpers/fixture_deploy_helper.rb @@ -42,6 +42,22 @@ def deploy_fixtures(set, subset: nil, **args) # extra args are passed through to success end + def deploy_global_fixtures(set, subset: nil, **args) + fixtures = load_fixtures(set, subset) + raise "Cannot deploy empty template set" if fixtures.empty? + args[:selector] ||= "test=#{@namespace}" + namespace_globals(fixtures) + + yield fixtures if block_given? + + success = false + Dir.mktmpdir("fixture_dir") do |target_dir| + write_fixtures_to_dir(fixtures, target_dir) + success = global_deploy_dirs_without_profiling(target_dir, **args) + end + success + end + def deploy_raw_fixtures(set, wait: true, bindings: {}, subset: nil, render_erb: false) success = false if subset @@ -88,6 +104,23 @@ def deploy_dirs_without_profiling(dirs, wait: true, allow_protected_ns: false, p ) end + def global_deploy_dirs_without_profiling(dirs, verify_result: true, prune: false, + global_timeout: 300, selector:) + deploy = Krane::GlobalDeployTask.new( + context: KubeclientHelper::TEST_CONTEXT, + filenames: Array(dirs), + global_timeout: global_timeout, + selector: Krane::LabelSelector.parse(selector), + logger: logger, + ) + deploy.run( + verify_result: verify_result, + prune: prune + ) + ensure + delete_globals(Array(dirs)) + end + # Deploys all fixtures in the given directories via KubernetesDeploy::DeployTask # Exposed for direct use only when deploy_fixtures cannot be used because the template cannot be loaded pre-deploy, # for example because it contains an intentional syntax error @@ -146,4 +179,22 @@ def build_kubectl(log_failure_by_default: true, timeout: '5s') Krane::Kubectl.new(task_config: task_config, log_failure_by_default: log_failure_by_default, default_timeout: timeout) end + + def namespace_globals(fixtures) + fixtures.each do |_, kinds_map| + kinds_map.each do |_, resources| + resources.each do |resource| + resource["metadata"]["name"] = (resource["metadata"]["name"] + @namespace)[0..63] + resource["metadata"]["labels"] ||= {} + resource["metadata"]["labels"]["test"] = @namespace + end + end + end + end + + def delete_globals(dirs) + kubectl = build_kubectl + paths = dirs.flat_map { |d| ["-f", d] } + kubectl.run("delete", *paths, log_failure: false, use_namespace: false) + end end diff --git a/test/helpers/mock_resource.rb b/test/helpers/mock_resource.rb new file mode 100644 index 000000000..58047eb86 --- /dev/null +++ b/test/helpers/mock_resource.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +MockResource = Struct.new(:id, :hits_to_complete, :status) do + def debug_message(*) + @debug_message + end + + def sync(_cache) + @hits ||= 0 + @hits += 1 + end + + def after_sync + end + + def type + "MockResource" + end + alias_method :kubectl_resource_type, :type + + def pretty_timeout_type + end + + def deploy_method + :apply + end + + def file_path + "/dev/null" + end + + def deploy_started_at=(_) + end + + def sensitive_template_content? + true + end + + def global? + false + end + + def deploy_succeeded? + status == "success" && hits_complete? + end + + def deploy_failed? + status == "failed" && hits_complete? + end + + def deploy_timed_out? + status == "timeout" && hits_complete? + end + + def timeout + hits_to_complete + end + + def sync_debug_info(_) + @debug_message = "Something went wrong" + end + + def pretty_status + "#{id} #{status} (#{@hits} hits)" + end + + def report_status_to_statsd(watch_time) + end + + private + + def hits_complete? + @hits ||= 0 + @hits >= hits_to_complete + end +end diff --git a/test/helpers/resource_cache_test_helper.rb b/test/helpers/resource_cache_test_helper.rb index 6371e5249..99c392b0b 100644 --- a/test/helpers/resource_cache_test_helper.rb +++ b/test/helpers/resource_cache_test_helper.rb @@ -1,17 +1,20 @@ # frozen_string_literal: true module ResourceCacheTestHelper - def stub_kind_get(kind, items: [], times: 1) + def stub_kind_get(kind, items: [], times: 1, use_namespace: true) stub_kubectl_response( "get", kind, "--chunk-size=0", resp: { items: items }, - kwargs: { attempts: 5, output_is_sensitive: false }, + kwargs: { attempts: 5, output_is_sensitive: false, use_namespace: use_namespace }, times: times, ) end - def build_resource_cache - Krane::ResourceCache.new(task_config(namespace: 'test-ns')) + def build_resource_cache(task_config: nil) + task_config ||= task_config(namespace: 'test-ns').tap do |config| + config.stubs(:global_kinds).returns(%w(Node FakeNode)) + end + Krane::ResourceCache.new(task_config) end end diff --git a/test/helpers/test_provisioner.rb b/test/helpers/test_provisioner.rb index 11e8ae32d..c439ad0c5 100644 --- a/test/helpers/test_provisioner.rb +++ b/test/helpers/test_provisioner.rb @@ -9,7 +9,7 @@ class << self def prepare_cluster WebMock.allow_net_connect! $stderr.print("Preparing test cluster... ") - [ENV['PARALLELISM'].to_i, 2].max.times { |i| prepare_pv("pv000#{i}") } + [ENV['PARALLELISM'].to_i, 4].max.times { |i| prepare_pv("pv000#{i}") } $stderr.puts "Done." WebMock.disable_net_connect! end diff --git a/test/integration-serial/serial_deploy_test.rb b/test/integration-serial/serial_deploy_test.rb index 6d5b809ed..8290cc9cd 100644 --- a/test/integration-serial/serial_deploy_test.rb +++ b/test/integration-serial/serial_deploy_test.rb @@ -305,8 +305,8 @@ def test_all_expected_statsd_metrics_emitted_with_essential_tags %w( KubernetesDeploy.validate_configuration.duration KubernetesDeploy.discover_resources.duration - KubernetesDeploy.validate_resources.duration KubernetesDeploy.initial_status.duration + KubernetesDeploy.validate_resources.duration KubernetesDeploy.priority_resources.duration KubernetesDeploy.apply_all.duration KubernetesDeploy.normal_resources.duration @@ -321,6 +321,29 @@ def test_all_expected_statsd_metrics_emitted_with_essential_tags end end + def test_global_deploy_emits_expected_statsd_metrics + metrics = capture_statsd_calls(client: Krane::StatsD.client) do + assert_deploy_success(deploy_global_fixtures('globals')) + end + + assert_equal(1, metrics.count { |m| m.type == :_e }, "Expected to find one event metric") + + %w( + KubernetesDeploy.validate_configuration.duration + KubernetesDeploy.discover_resources.duration + KubernetesDeploy.initial_status.duration + KubernetesDeploy.validate_resources.duration + KubernetesDeploy.apply_all.duration + KubernetesDeploy.normal_resources.duration + KubernetesDeploy.sync.duration + KubernetesDeploy.all_resources.duration + ).each do |expected_metric| + metric = metrics.find { |m| m.name == expected_metric } + refute_nil metric, "Metric #{expected_metric} not emitted" + assert_includes metric.tags, "context:#{KubeclientHelper::TEST_CONTEXT}", "#{metric.name} is missing context tag" + end + end + def test_cr_deploys_without_rollout_conditions_when_none_present_deprecated assert_deploy_success(deploy_fixtures("crd", subset: %w(widgets_deprecated.yml))) assert_deploy_success(deploy_fixtures("crd", subset: %w(widgets_cr.yml))) @@ -533,6 +556,53 @@ def test_apply_failure_with_sensitive_resources_hides_template_content refute_logs_match("kind: Deployment") # content of the sensitive template end + # test_global_deploy_black_box_failure is in test/integration/krane_test.rb + # because it does not modify global state. The following two tests modify + # global state and must be run in serially + def test_global_deploy_black_box_success + setup_template_dir("globals") do |target_dir| + flags = "-f #{target_dir} --selector app=krane" + out, err, status = krane_black_box("global-deploy", "#{KubeclientHelper::TEST_CONTEXT} #{flags}") + assert_empty(out) + assert_match("Success", err) + assert_predicate(status, :success?) + end + ensure + build_kubectl.run("delete", "-f", fixture_path("globals"), use_namespace: false, log_failure: false) + end + + def test_global_deploy_black_box_timeout + setup_template_dir("globals") do |target_dir| + flags = "-f #{target_dir} --selector app=krane --global-timeout=0.1s" + out, err, status = krane_black_box("global-deploy", "#{KubeclientHelper::TEST_CONTEXT} #{flags}") + assert_empty(out) + assert_match("TIMED OUT", err) + refute_predicate(status, :success?) + assert_equal(status.exitstatus, 70) + end + ensure + build_kubectl.run("delete", "-f", fixture_path("globals"), use_namespace: false, log_failure: false) + end + + def test_global_deploy_validation_catches_namespaced_cr + assert_deploy_success(global_deploy_dirs_without_profiling('test/fixtures/crd/mail.yml', selector: "app=krane")) + reset_logger + assert_deploy_failure(global_deploy_dirs_without_profiling('test/fixtures/crd/mail_cr.yml', selector: "app=krane")) + assert_logs_match_all([ + "Phase 1: Initializing deploy", + "Using resource selector app=krane", + "All required parameters and files are present", + "Discovering resources:", + "- Mail/my-first-mail", + "Result: FAILURE", + "This command cannot deploy namespaced resources", + "Namespaced resources:", + "my-first-mail (Mail)", + ]) + ensure + wait_for_all_crd_deletion + end + private def wait_for_all_crd_deletion diff --git a/test/integration/global_deploy_test.rb b/test/integration/global_deploy_test.rb new file mode 100644 index 000000000..9585d8252 --- /dev/null +++ b/test/integration/global_deploy_test.rb @@ -0,0 +1,119 @@ +# frozen_string_literal: true +require 'integration_test_helper' + +class GlobalDeployTest < Krane::IntegrationTest + def test_global_deploy_task_success + assert_deploy_success(deploy_global_fixtures('globals')) + + assert_logs_match_all([ + "Phase 1: Initializing deploy", + "Using resource selector test=", + "All required parameters and files are present", + "Discovering resources:", + " - StorageClass/testing-storage-class", + "Phase 2: Checking initial resource statuses", + %r{StorageClass\/testing-storage-class[\w-]+\s+Not Found}, + "Phase 3: Deploying all resources", + %r{Deploying StorageClass\/testing-storage-class[\w-]+ \(timeout: 300s\)}, + "Don't know how to monitor resources of type StorageClass.", + %r{Assuming StorageClass\/testing-storage-class[\w-]+ deployed successfully.}, + %r{Successfully deployed in [\d.]+s: StorageClass\/testing-storage-class}, + "Result: SUCCESS", + "Successfully deployed 1 resource", + "Successful resources", + "StorageClass/testing-storage-class", + ]) + end + + def test_global_deploy_task_success_timeout + assert_deploy_failure(deploy_global_fixtures('globals', global_timeout: 0), :timed_out) + + assert_logs_match_all([ + "Phase 1: Initializing deploy", + "Using resource selector test=", + "All required parameters and files are present", + "Discovering resources:", + " - StorageClass/testing-storage-class", + "Phase 2: Checking initial resource statuses", + %r{StorageClass\/testing-storage-class[\w-]+\s+Not Found}, + "Phase 3: Deploying all resources", + %r{Deploying StorageClass\/testing-storage-class[\w-]+ \(timeout: 300s\)}, + "Result: TIMED OUT", + "Timed out waiting for 1 resource to deploy", + %r{StorageClass\/testing-storage-class[\w-]+: GLOBAL WATCH TIMEOUT \(0 seconds\)}, + "If you expected it to take longer than 0 seconds for your deploy to roll out, increase --max-watch-seconds.", + ]) + end + + def test_global_deploy_task_success_verify_false + assert_deploy_success(deploy_global_fixtures('globals', verify_result: false)) + + assert_logs_match_all([ + "Phase 1: Initializing deploy", + "Using resource selector test=", + "All required parameters and files are present", + "Discovering resources:", + " - StorageClass/testing-storage-class", + "Phase 2: Checking initial resource statuses", + %r{StorageClass\/testing-storage-class[\w-]+\s+Not Found}, + "Phase 3: Deploying all resources", + %r{Deploying StorageClass\/testing-storage-class[\w-]+ \(timeout: 300s\)}, + "Result: SUCCESS", + "Deployed 1 resource", + "Deploy result verification is disabled for this deploy.", + "This means the desired changes were communicated to Kubernetes, but the"\ + " deploy did not make sure they actually succeeded.", + ]) + end + + def test_global_deploy_task_empty_selector_validation_failure + assert_deploy_failure(deploy_global_fixtures('globals', selector: "")) + assert_logs_match_all([ + "Phase 1: Initializing deploy", + "Result: FAILURE", + "Configuration invalid", + "- Selector is required", + ]) + end + + def test_global_deploy_task_success_selector + selector = "app=krane" + assert_deploy_success(deploy_global_fixtures('globals', selector: selector)) + + assert_logs_match_all([ + "Phase 1: Initializing deploy", + "Using resource selector #{selector}", + "All required parameters and files are present", + "Discovering resources:", + " - StorageClass/testing-storage-class", + "Phase 2: Checking initial resource statuses", + %r{StorageClass\/testing-storage-class[\w-]+\s+Not Found}, + "Phase 3: Deploying all resources", + %r{Deploying StorageClass\/testing-storage-class[\w-]+ \(timeout: 300s\)}, + "Don't know how to monitor resources of type StorageClass.", + %r{Assuming StorageClass\/testing-storage-class[\w-]+ deployed successfully.}, + %r{Successfully deployed in [\d.]+s: StorageClass\/testing-storage-class}, + "Result: SUCCESS", + "Successfully deployed 1 resource", + "Successful resources", + "StorageClass/testing-storage-class", + ]) + end + + def test_global_deploy_task_failure + result = deploy_global_fixtures('globals') do |fixtures| + fixtures.dig("storage_classes.yml", "StorageClass").first["metadata"]['badField'] = "true" + end + assert_deploy_failure(result) + + assert_logs_match_all([ + "Phase 1: Initializing deploy", + "Using resource selector test=", + "All required parameters and files are present", + "Discovering resources:", + " - StorageClass/testing-storage-class", + "Result: FAILURE", + "Template validation failed", + ]) + end +end diff --git a/test/integration/krane_test.rb b/test/integration/krane_test.rb index 9379a6b98..eaf87136a 100644 --- a/test/integration/krane_test.rb +++ b/test/integration/krane_test.rb @@ -87,6 +87,21 @@ def test_deploy_black_box_timeout end end + # test_global_deploy_black_box_success and test_global_deploy_black_box_timeout + # are in test/integration-serial/serial_deploy_test.rb because they modify + # global state + + def test_global_deploy_black_box_failure + setup_template_dir("resource-quota") do |target_dir| + flags = "-f #{target_dir} --selector app=krane" + out, err, status = krane_black_box("global-deploy", "#{KubeclientHelper::TEST_CONTEXT} #{flags}") + assert_empty(out) + assert_match("FAILURE", err) + refute_predicate(status, :success?) + assert_equal(status.exitstatus, 1) + end + end + private def task_runner_pods diff --git a/test/test_helper.rb b/test/test_helper.rb index 42e8422d1..c211c13ee 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -225,7 +225,7 @@ def mock_output_stream end def task_config(context: KubeclientHelper::TEST_CONTEXT, namespace: @namespace, logger: @logger) - Krane::TaskConfig.new(context, namespace, logger) + @task_config ||= Krane::TaskConfig.new(context, namespace, logger) end def krane_black_box(command, args = "") diff --git a/test/unit/krane/kubernetes_resource/daemon_set_test.rb b/test/unit/krane/kubernetes_resource/daemon_set_test.rb index af7953c18..221990a5f 100644 --- a/test/unit/krane/kubernetes_resource/daemon_set_test.rb +++ b/test/unit/krane/kubernetes_resource/daemon_set_test.rb @@ -129,7 +129,7 @@ def build_synced_ds(ds_template:, pod_templates: [], node_templates: []) ds = Krane::DaemonSet.new(namespace: "test", context: "nope", logger: logger, definition: ds_template) stub_kind_get("DaemonSet", items: [ds_template]) stub_kind_get("Pod", items: pod_templates) - stub_kind_get("Node", items: node_templates) + stub_kind_get("Node", items: node_templates, use_namespace: false) ds.sync(build_resource_cache) ds end diff --git a/test/unit/krane/kubernetes_resource/horizontal_pod_autoscaler_test.rb b/test/unit/krane/kubernetes_resource/horizontal_pod_autoscaler_test.rb index 73ab811ac..ac59c48d3 100644 --- a/test/unit/krane/kubernetes_resource/horizontal_pod_autoscaler_test.rb +++ b/test/unit/krane/kubernetes_resource/horizontal_pod_autoscaler_test.rb @@ -7,7 +7,7 @@ class HorizontalPodAutoscalerTest < Krane::TestCase # We can't get integration coverage for HPA right now because the metrics server just isn't reliable enough on our CI def test_hpa_is_whitelisted_for_pruning Krane::Kubectl.any_instance.expects("run") - .with("get", "CustomResourceDefinition", output: "json", attempts: 5) + .with("get", "CustomResourceDefinition", output: "json", attempts: 5, use_namespace: false) .returns(['{ "items": [] }', "", SystemExit.new(0)]) task = Krane::DeployTask.new(namespace: 'test', context: KubeclientHelper::TEST_CONTEXT, current_sha: 'foo', template_paths: [''], logger: logger) diff --git a/test/unit/krane/kubernetes_resource/pod_test.rb b/test/unit/krane/kubernetes_resource/pod_test.rb index c4d6b327a..950b9d26d 100644 --- a/test/unit/krane/kubernetes_resource/pod_test.rb +++ b/test/unit/krane/kubernetes_resource/pod_test.rb @@ -220,7 +220,7 @@ def test_deploy_failed_is_true_for_disappeared_unmanaged_pods template = build_pod_template pod = Krane::Pod.new(namespace: 'test', context: 'nope', definition: template, logger: @logger, deploy_started_at: Time.now.utc) - cache = build_resource_cache + cache = build_resource_cache(task_config: task_config) cache.expects(:get_instance).raises(Krane::Kubectl::ResourceNotFoundError) pod.sync(cache) @@ -233,7 +233,7 @@ def test_deploy_failed_is_false_for_disappeared_managed_pods template = build_pod_template pod = Krane::Pod.new(namespace: 'test', context: 'nope', definition: template, logger: @logger, deploy_started_at: Time.now.utc, parent: mock) - cache = build_resource_cache + cache = build_resource_cache(task_config: task_config) cache.expects(:get_instance).raises(Krane::Kubectl::ResourceNotFoundError) pod.sync(cache) diff --git a/test/unit/krane/kubernetes_resource_test.rb b/test/unit/krane/kubernetes_resource_test.rb index fb63fa5e5..73d893cfc 100644 --- a/test/unit/krane/kubernetes_resource_test.rb +++ b/test/unit/krane/kubernetes_resource_test.rb @@ -398,7 +398,7 @@ def test_whitespace_in_debug_message def test_disappeared_is_true_if_resource_has_been_deployed_and_404s dummy = DummyResource.new - cache = Krane::ResourceCache.new(task_config: task_config(namespace: 'test', context: 'minikube')) + cache = Krane::ResourceCache.new(task_config(namespace: 'test', context: 'minikube')) cache.expects(:get_instance).raises(Krane::Kubectl::ResourceNotFoundError).twice dummy.sync(cache) @@ -411,7 +411,9 @@ def test_disappeared_is_true_if_resource_has_been_deployed_and_404s def test_disappeared_is_false_if_resource_has_been_deployed_and_we_get_a_server_error dummy = DummyResource.new - cache = Krane::ResourceCache.new(task_config: task_config(namespace: 'test', context: 'minikube')) + config = task_config(namespace: 'test', context: 'minikube') + config.stubs(:global_kinds).returns([]) + cache = Krane::ResourceCache.new(config) Krane::Kubectl.any_instance.expects(:run).returns(["", "NotFound", stub(success?: false)]).twice dummy.sync(cache) diff --git a/test/unit/krane/resource_deployer_test.rb b/test/unit/krane/resource_deployer_test.rb new file mode 100644 index 000000000..531cbe268 --- /dev/null +++ b/test/unit/krane/resource_deployer_test.rb @@ -0,0 +1,99 @@ +# frozen_string_literal: true +require 'test_helper' +require 'krane/resource_deployer' + +class ResourceDeployerTest < Krane::TestCase + def test_deploy_prune_builds_whitelist + whitelist_kind = "fake_kind" + resource = build_mock_resource + Krane::Kubectl.any_instance.expects(:run).with do |*args| + args.include?("--prune-whitelist=#{whitelist_kind}") + end.returns(["", "", stub(success?: true)]) + resource_deployer(kubectl_times: 0, prune_whitelist: [whitelist_kind]).deploy!([resource], false, true) + end + + def test_deploy_no_prune_doesnt_prune + whitelist_kind = "fake_kind" + resource = build_mock_resource + Krane::Kubectl.any_instance.expects(:run).with do |*args| + !args.include?("--prune-whitelist=#{whitelist_kind}") + end.returns(["", "", stub(success?: true)]) + resource_deployer(kubectl_times: 0, prune_whitelist: [whitelist_kind]).deploy!([resource], false, false) + end + + def test_deploy_verify_false_message + resource = build_mock_resource + resource_deployer.deploy!([resource], false, false) + logger.print_summary(:done) # Force logger to flush + assert_logs_match_all(["Deploy result verification is disabled for this deploy."]) + end + + def test_deploy_time_out_error + resource = build_mock_resource(final_status: "timeout") + watcher = mock("ResourceWatcher") + watcher.expects(:run).returns(true) + Krane::ResourceWatcher.expects(:new).returns(watcher) + assert_raises(Krane::DeploymentTimeoutError) do + resource_deployer.deploy!([resource], true, false) + end + end + + def test_deploy_verify_false_no_timeout + resource = build_mock_resource(final_status: "timeout") + resource_deployer.deploy!([resource], false, false) + logger.print_summary(:done) # Force logger to flush + assert_logs_match_all(["Deploy result verification is disabled for this deploy."]) + end + + def test_deploy_failure_error + resource = build_mock_resource(final_status: "failure") + watcher = mock("ResourceWatcher") + watcher.expects(:run) + Krane::ResourceWatcher.expects(:new).returns(watcher) + assert_raises(Krane::FatalDeploymentError) do + resource_deployer.deploy!([resource], true, false) + end + end + + def test_deploy_verify_false_no_failure_error + resource = build_mock_resource(final_status: "failure") + resource_deployer.deploy!([resource], false, false) + logger.print_summary(:done) # Force logger to flush + assert_logs_match_all(["Deploy result verification is disabled for this deploy."]) + end + + def test_predeploy_priority_resources_respects_pre_deploy_list + kind = "MockResource" + resource = build_mock_resource + watcher = mock("ResourceWatcher") + watcher.expects(:run).returns(true) + # ResourceDeployer only creates a ResourceWatcher if one or more resources + # are deployed. See test_predeploy_priority_resources_respects_empty_pre_deploy_list + # for counter example + Krane::ResourceWatcher.expects(:new).returns(watcher) + priority_list = [kind] + resource_deployer.predeploy_priority_resources([resource], priority_list) + end + + def test_predeploy_priority_resources_respects_empty_pre_deploy_list + resource = build_mock_resource + priority_list = [] + Krane::ResourceWatcher.expects(:new).times(0) + resource_deployer(kubectl_times: 0).predeploy_priority_resources([resource], priority_list) + end + + private + + def resource_deployer(kubectl_times: 2, prune_whitelist: []) + unless kubectl_times == 0 + Krane::Kubectl.expects(:new).returns(build_runless_kubectl).times(kubectl_times) + end + @deployer = Krane::ResourceDeployer.new(current_sha: 'test-sha', + statsd_tags: [], task_config: task_config, prune_whitelist: prune_whitelist, + max_watch_seconds: 1, selector: nil) + end + + def build_mock_resource(final_status: "success", hits_to_complete: 0, name: "web-pod") + MockResource.new(name, hits_to_complete, final_status) + end +end diff --git a/test/unit/krane/resource_watcher_test.rb b/test/unit/krane/resource_watcher_test.rb index f120755fa..6cf08e7aa 100644 --- a/test/unit/krane/resource_watcher_test.rb +++ b/test/unit/krane/resource_watcher_test.rb @@ -123,58 +123,6 @@ def build_watcher(resources) ) end - MockResource = Struct.new(:id, :hits_to_complete, :status) do - def debug_message(*) - @debug_message - end - - def sync(_cache) - @hits ||= 0 - @hits += 1 - end - - def after_sync - end - - def type - "MockResource" - end - alias_method :kubectl_resource_type, :type - - def deploy_succeeded? - status == "success" && hits_complete? - end - - def deploy_failed? - status == "failed" && hits_complete? - end - - def deploy_timed_out? - status == "timeout" && hits_complete? - end - - def timeout - hits_to_complete - end - - def sync_debug_info(_) - @debug_message = "Something went wrong" - end - - def pretty_status - "#{id} #{status} (#{@hits} hits)" - end - - def report_status_to_statsd(watch_time) - end - - private - - def hits_complete? - @hits >= hits_to_complete - end - end - def build_mock_resource(final_status: "success", hits_to_complete: 1, name: "web-pod") MockResource.new(name, hits_to_complete, final_status) end diff --git a/test/unit/resource_cache_test.rb b/test/unit/resource_cache_test.rb index 7ce6311d4..fd14ccf0f 100644 --- a/test/unit/resource_cache_test.rb +++ b/test/unit/resource_cache_test.rb @@ -16,6 +16,13 @@ def test_get_instance_populates_the_cache_and_returns_instance_hash assert_equal(pods[1].kubectl_response, @cache.get_instance("FakePod", pods[1].name)) end + def test_get_instance_populates_the_cache_for_global_resources + nodes = build_fake_nodes(2) + stub_kind_get("FakeNode", items: nodes.map(&:kubectl_response), times: 1, use_namespace: false) + assert_equal(nodes[0].kubectl_response, @cache.get_instance("FakeNode", nodes[0].name)) + assert_equal(nodes[1].kubectl_response, @cache.get_instance("FakeNode", nodes[1].name)) + end + def test_get_instance_returns_empty_hash_if_pod_not_found pods = build_fake_pods(2) stub_kind_get("FakePod", items: pods.map(&:kubectl_response), times: 1) @@ -37,7 +44,8 @@ def test_get_all_populates_cache_and_returns_array_of_instance_hashes end def test_if_kubectl_error_then_empty_result_returned_but_not_cached - stub_kubectl_response('get', 'FakeConfigMap', '--chunk-size=0', kwargs: { attempts: 5, output_is_sensitive: false }, + stub_kubectl_response('get', 'FakeConfigMap', '--chunk-size=0', + kwargs: { attempts: 5, output_is_sensitive: false, use_namespace: true }, success: false, resp: { "items" => [] }, err: 'no', times: 4) # All of these calls should attempt the request again (see the 'times' arg above) @@ -80,6 +88,10 @@ def test_concurrently_syncing_huge_numbers_of_resources_makes_exactly_one_kubect private + def build_fake_nodes(num) + num.times.map { |n| FakeNode.new("node#{n}") } + end + def build_fake_pods(num) num.times.map { |n| FakePod.new("pod#{n}") } end @@ -140,4 +152,9 @@ def sync(mediator) end class FakePod < MockResource; end class FakeConfigMap < MockResource; end + class FakeNode < MockResource + def global? + true + end + end end