diff --git a/lib/kubernetes-deploy/container_logs.rb b/lib/kubernetes-deploy/container_logs.rb index 998bb3ec9..d43144d32 100644 --- a/lib/kubernetes-deploy/container_logs.rb +++ b/lib/kubernetes-deploy/container_logs.rb @@ -18,7 +18,7 @@ def initialize(parent_id:, container_name:, namespace:, context:, logger:) def sync new_logs = fetch_latest return unless new_logs.present? - @lines += deduplicate(new_logs) + @lines += sort_and_deduplicate(new_logs) end def empty? @@ -60,18 +60,24 @@ def rfc3339_timestamp(time) time.strftime("%FT%T.%N%:z") end - def deduplicate(logs) - deduped = [] - check_for_duplicate = true + def sort_and_deduplicate(logs) + parsed_lines = logs.map { |line| split_timestamped_line(line) } + sorted_lines = parsed_lines.sort do |(timestamp1, _msg1), (timestamp2, _msg2)| + if timestamp1.nil? + -1 + elsif timestamp2.nil? + 1 + else + timestamp1 <=> timestamp2 + end + end - logs.each do |line| - timestamp, msg = split_timestamped_line(line) - next if check_for_duplicate && likely_duplicate?(timestamp) - check_for_duplicate = false # logs are ordered, so once we've seen a new one, assume all subsequent logs are new + deduped = [] + sorted_lines.each do |timestamp, msg| + next if likely_duplicate?(timestamp) @last_timestamp = timestamp if timestamp deduped << msg end - deduped end diff --git a/test/unit/container_logs_test.rb b/test/unit/container_logs_test.rb index 33fceb3a3..f6342ef3b 100644 --- a/test/unit/container_logs_test.rb +++ b/test/unit/container_logs_test.rb @@ -81,8 +81,8 @@ def test_print_latest_supports_prefixing end def test_logs_without_timestamps_are_not_deduped - logs_response_1_with_anomaly = logs_response_1 + "Line 3.5" - logs_response_2_with_anomaly = "Line 3.5\n" + logs_response_2 + logs_response_1_with_anomaly = logs_response_1 + "No timestamp" + logs_response_2_with_anomaly = "No timestamp 2\n" + logs_response_2 KubernetesDeploy::Kubectl.any_instance.stubs(:run) .returns([logs_response_1_with_anomaly, "", ""]) .then.returns([logs_response_2_with_anomaly, "", ""]) @@ -90,7 +90,14 @@ def test_logs_without_timestamps_are_not_deduped @logs.sync @logs.sync @logs.print_all - assert_logs_match("Line 3.5", 2) + assert_logs_match_all([ + "No timestamp", # moved to start of batch 1 + "Line 1", + "Line 2", + "Line 3", + "No timestamp 2", # moved to start of batch 2 + "Line 4" + ], in_order: true) end def test_deduplication_works_when_exact_same_batch_is_returned_more_than_once @@ -108,6 +115,43 @@ def test_deduplication_works_when_exact_same_batch_is_returned_more_than_once assert_logs_match("Line 2", 1) end + def test_deduplication_works_when_last_line_is_out_of_order + regression_data = <<~STRING + 2018-12-13T12:17:23.727605598Z Line 1 + 2018-12-13T12:17:23.727696012Z Line 2 + 2018-12-13T12:17:23.728538913Z Line 3 + 2018-12-13T12:17:23.7287293Z Line 4 + 2018-12-13T12:17:23.729694842Z Line 5 + 2018-12-13T12:17:23.731259592Z Line 7 + 2018-12-13T12:17:23.73127007Z Line 8 + 2018-12-13T12:17:23.731273672Z Line 9 + 2018-12-13T12:17:23.731276862Z Line 10 + 2018-12-13T12:17:23.731284069Z Line 11 + 2018-12-13T12:17:23.731287054Z Line 12 + 2018-12-13T12:17:23.731289959Z Line 13 + 2018-12-13T12:17:23.731292814Z Line 14 + 2018-12-13T12:17:23.731295298Z Line 15 + 2018-12-13T12:17:23.731297747Z Line 16 + 2018-12-13T12:17:23.731297748Z Line 17 + 2018-12-13T12:17:23.729851532Z Line 6 + STRING + + KubernetesDeploy::Kubectl.any_instance.stubs(:run) + .returns([regression_data, "", ""]).times(12) + + 12.times do + @logs.sync + @logs.print_latest + end + + expected_lines = generate_log_messages(1..17) + + expected_lines.each do |line| + assert_logs_match(/#{line}$/, 1) # no duplicates + end + assert_logs_match_all(expected_lines, in_order: true) # sorted correctly + end + private def generate_log_messages(range)