From 4b0c715e57abe40b5c76601fee4ae923d467262c Mon Sep 17 00:00:00 2001 From: Katrina Verey Date: Thu, 13 Dec 2018 12:31:15 -0500 Subject: [PATCH 1/2] Logs from different streams can arrive out of order --- lib/kubernetes-deploy/container_logs.rb | 19 +++++++++---- test/unit/container_logs_test.rb | 37 +++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/lib/kubernetes-deploy/container_logs.rb b/lib/kubernetes-deploy/container_logs.rb index 998bb3ec9..d5008a642 100644 --- a/lib/kubernetes-deploy/container_logs.rb +++ b/lib/kubernetes-deploy/container_logs.rb @@ -62,16 +62,23 @@ def rfc3339_timestamp(time) def deduplicate(logs) deduped = [] - check_for_duplicate = true - logs.each do |line| - timestamp, msg = split_timestamped_line(line) - next if check_for_duplicate && likely_duplicate?(timestamp) - check_for_duplicate = false # logs are ordered, so once we've seen a new one, assume all subsequent logs are new + parsed_lines = logs.map { |line| split_timestamped_line(line) } + sorted_lines = parsed_lines.sort do |a, b| + if a[0].nil? || b[0].nil? + 0 + else + a[0] <=> b[0] + end + end + + sorted_lines.each do |line| + timestamp = line[0] + msg = line[1] + next if likely_duplicate?(timestamp) @last_timestamp = timestamp if timestamp deduped << msg end - deduped end diff --git a/test/unit/container_logs_test.rb b/test/unit/container_logs_test.rb index 33fceb3a3..15e1acd6b 100644 --- a/test/unit/container_logs_test.rb +++ b/test/unit/container_logs_test.rb @@ -108,6 +108,43 @@ def test_deduplication_works_when_exact_same_batch_is_returned_more_than_once assert_logs_match("Line 2", 1) end + def test_deduplication_works_when_last_line_is_out_of_order + regression_data = <<~STRING + 2018-12-13T12:17:23.727605598Z Line 1 + 2018-12-13T12:17:23.727696012Z Line 2 + 2018-12-13T12:17:23.728538913Z Line 3 + 2018-12-13T12:17:23.7287293Z Line 4 + 2018-12-13T12:17:23.729694842Z Line 5 + 2018-12-13T12:17:23.731259592Z Line 7 + 2018-12-13T12:17:23.73127007Z Line 8 + 2018-12-13T12:17:23.731273672Z Line 9 + 2018-12-13T12:17:23.731276862Z Line 10 + 2018-12-13T12:17:23.731284069Z Line 11 + 2018-12-13T12:17:23.731287054Z Line 12 + 2018-12-13T12:17:23.731289959Z Line 13 + 2018-12-13T12:17:23.731292814Z Line 14 + 2018-12-13T12:17:23.731295298Z Line 15 + 2018-12-13T12:17:23.731297747Z Line 16 + 2018-12-13T12:17:23.731297748Z Line 17 + 2018-12-13T12:17:23.729851532Z Line 6 + STRING + + KubernetesDeploy::Kubectl.any_instance.stubs(:run) + .returns([regression_data, "", ""]).times(12) + + 12.times do + @logs.sync + @logs.print_latest + end + + expected_lines = generate_log_messages(1..17) + + expected_lines.each do |line| + assert_logs_match(/#{line}$/, 1) # no duplicates + end + assert_logs_match_all(expected_lines, in_order: true) # sorted correctly + end + private def generate_log_messages(range) From f9a0193532bd7d5b922417d0d9854ba3172b1777 Mon Sep 17 00:00:00 2001 From: Katrina Verey Date: Fri, 14 Dec 2018 16:15:22 -0500 Subject: [PATCH 2/2] Better vars and missing timestamp sorting --- lib/kubernetes-deploy/container_logs.rb | 21 ++++++++++----------- test/unit/container_logs_test.rb | 13 ++++++++++--- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/lib/kubernetes-deploy/container_logs.rb b/lib/kubernetes-deploy/container_logs.rb index d5008a642..d43144d32 100644 --- a/lib/kubernetes-deploy/container_logs.rb +++ b/lib/kubernetes-deploy/container_logs.rb @@ -18,7 +18,7 @@ def initialize(parent_id:, container_name:, namespace:, context:, logger:) def sync new_logs = fetch_latest return unless new_logs.present? - @lines += deduplicate(new_logs) + @lines += sort_and_deduplicate(new_logs) end def empty? @@ -60,21 +60,20 @@ def rfc3339_timestamp(time) time.strftime("%FT%T.%N%:z") end - def deduplicate(logs) - deduped = [] - + def sort_and_deduplicate(logs) parsed_lines = logs.map { |line| split_timestamped_line(line) } - sorted_lines = parsed_lines.sort do |a, b| - if a[0].nil? || b[0].nil? - 0 + sorted_lines = parsed_lines.sort do |(timestamp1, _msg1), (timestamp2, _msg2)| + if timestamp1.nil? + -1 + elsif timestamp2.nil? + 1 else - a[0] <=> b[0] + timestamp1 <=> timestamp2 end end - sorted_lines.each do |line| - timestamp = line[0] - msg = line[1] + deduped = [] + sorted_lines.each do |timestamp, msg| next if likely_duplicate?(timestamp) @last_timestamp = timestamp if timestamp deduped << msg diff --git a/test/unit/container_logs_test.rb b/test/unit/container_logs_test.rb index 15e1acd6b..f6342ef3b 100644 --- a/test/unit/container_logs_test.rb +++ b/test/unit/container_logs_test.rb @@ -81,8 +81,8 @@ def test_print_latest_supports_prefixing end def test_logs_without_timestamps_are_not_deduped - logs_response_1_with_anomaly = logs_response_1 + "Line 3.5" - logs_response_2_with_anomaly = "Line 3.5\n" + logs_response_2 + logs_response_1_with_anomaly = logs_response_1 + "No timestamp" + logs_response_2_with_anomaly = "No timestamp 2\n" + logs_response_2 KubernetesDeploy::Kubectl.any_instance.stubs(:run) .returns([logs_response_1_with_anomaly, "", ""]) .then.returns([logs_response_2_with_anomaly, "", ""]) @@ -90,7 +90,14 @@ def test_logs_without_timestamps_are_not_deduped @logs.sync @logs.sync @logs.print_all - assert_logs_match("Line 3.5", 2) + assert_logs_match_all([ + "No timestamp", # moved to start of batch 1 + "Line 1", + "Line 2", + "Line 3", + "No timestamp 2", # moved to start of batch 2 + "Line 4" + ], in_order: true) end def test_deduplication_works_when_exact_same_batch_is_returned_more_than_once