From 4a02a5f711349653ca06d3f8415fee2d8fafd4b8 Mon Sep 17 00:00:00 2001
From: Lukas Atkinson <opensource@LukasAtkinson.de>
Date: Sat, 24 Feb 2018 18:12:47 +0100
Subject: [PATCH] gcov: support GCC 8 gcov format

See #226 for background.

At this opportunity, the parsing was made a bit more robust.

  - Any exceptions during parsing are swallowed.
    Parsing will always succeed, but may be garbage.
  - After parsing, failed lines and any exceptions are summarized.
  - These errors are shown always, not just in verbose mode.
  - The error message links directly to the Github issues.
---
 gcovr/gcov.py                   |  73 ++++++++-
 gcovr/tests/test_gcov_parser.py | 258 ++++++++++++++++++++++++++++++++
 2 files changed, 323 insertions(+), 8 deletions(-)
 create mode 100644 gcovr/tests/test_gcov_parser.py

diff --git a/gcovr/gcov.py b/gcovr/gcov.py
index 889c29333..36635dea7 100644
--- a/gcovr/gcov.py
+++ b/gcovr/gcov.py
@@ -112,9 +112,9 @@ def process_gcov_data(data_fname, covdata, source_fname, options):
         return
 
     parser = GcovParser(fname, logger=logger)
-    for line in INPUT:
-        parser.parse_line(line, options.exclude_unreachable_branches)
+    parser.parse_all_lines(INPUT, options.exclude_unreachable_branches)
     parser.update_coverage(covdata)
+    parser.check_unrecognized_lines()
     parser.check_unclosed_exclusions()
 
     INPUT.close()
@@ -217,6 +217,17 @@ def __init__(self, fname, logger):
         self.last_code_line = ""
         self.last_code_lineno = 0
         self.last_code_line_excluded = False
+        self.unrecognized_lines = []
+        self.deferred_exceptions = []
+        self.last_was_specialization_section_marker = False
+
+    def parse_all_lines(self, lines, exclude_unreachable_branches):
+        for line in lines:
+            try:
+                self.parse_line(line, exclude_unreachable_branches)
+            except Exception as ex:
+                self.unrecognized_lines.append(line)
+                self.deferred_exceptions.append(ex)
 
     def parse_line(self, line, exclude_unreachable_branches):
         # If this is a tag line, we stay on the same line number
@@ -256,11 +267,7 @@ def parse_line(self, line, exclude_unreachable_branches):
         is_code_statement = self.parse_code_line(status, code)
 
         if not is_code_statement:
-            self.logger.verbose_msg(
-                "Unrecognized GCOV output: {line}\n"
-                "\tThis is indicitive of a gcov output parse error.\n"
-                "\tPlease report this to the gcovr developers.",
-                line=line)
+            self.unrecognized_lines.append(line)
 
         # save the code line to use it later with branches
         if is_code_statement:
@@ -293,12 +300,44 @@ def parse_code_line(self, status, code):
             return True
 
         if firstchar in "0123456789":
-            self.covered[self.lineno] = int(status)
+            # GCOV 8 marks partial coverage
+            # with a trailing "*" after the execution count.
+            self.covered[self.lineno] = int(status.rstrip('*'))
             return True
 
         return False
 
     def parse_tag_line(self, line, exclude_unreachable_branches):
+        # Start or end a template/macro specialization section
+        if line.startswith('-----'):
+            self.last_was_specialization_section_marker = True
+            return True
+
+        last_was_marker = self.last_was_specialization_section_marker
+        self.last_was_specialization_section_marker = False
+
+        # A specialization section marker is either followed by a section or
+        # ends it. If it starts a section, the next line contains a function
+        # name, followed by a colon. A function name cannot be parsed reliably,
+        # so we assume it is a function, and try to disprove this assumption by
+        # comparing with other kinds of lines.
+        if last_was_marker:
+            # 1. a function must end with a colon
+            is_function = line.endswith(':')
+
+            # 2. a function cannot start with space
+            if is_function:
+                is_function = not line.startswith(' ')
+
+            # 3. a function cannot start with a tag
+            if is_function:
+                tags = 'function call branch'.split()
+                is_function = not any(
+                    line.startswith(tag + ' ') for tag in tags)
+
+            # If this line turned out to be a function, discard it.
+            return True
+
         if line.startswith('function '):
             return True
 
@@ -390,6 +429,24 @@ def check_unclosed_exclusions(self):
                 "\tin file {fname}.",
                 header=header, line=line, fname=self.fname)
 
+    def check_unrecognized_lines(self):
+        if self.unrecognized_lines:
+            self.logger.warn(
+                "Unrecognized GCOV output for {source}\n"
+                "\t  {lines}\n"
+                "\tThis is indicitive of a gcov output parse error.\n"
+                "\tPlease report this to the gcovr developers\n"
+                "\tat <https://github.com/gcovr/gcovr/issues>.",
+                source=self.fname,
+                lines="\n\t  ".join(self.unrecognized_lines))
+        for ex in self.deferred_exceptions:
+            self.logger.warn(
+                "Exception during parsing:\n"
+                "\t{type}: {msg}",
+                type=type(ex).__name__, msg=ex)
+
+        return bool(self.unrecognized_lines)
+
     def update_coverage(self, covdata):
         self.logger.verbose_msg(
             "uncovered: {parser.uncovered}\n"
diff --git a/gcovr/tests/test_gcov_parser.py b/gcovr/tests/test_gcov_parser.py
new file mode 100644
index 000000000..0abc8306d
--- /dev/null
+++ b/gcovr/tests/test_gcov_parser.py
@@ -0,0 +1,258 @@
+# -*- coding:utf-8 -*-
+
+# This file is part of gcovr <http://gcovr.com/>.
+#
+# Copyright 2013-2018 the gcovr authors
+# Copyright 2013 Sandia Corporation
+# This software is distributed under the BSD license.
+
+import pytest
+
+from ..gcov import GcovParser
+from ..utils import Logger
+
+# This example is taken from the GCC 8 Gcov documentation:
+# <https://gcc.gnu.org/onlinedocs/gcc/Invoking-Gcov.html>
+GCOV_8_EXAMPLE = r"""
+        -:    0:Source:tmp.cpp
+        -:    0:Graph:tmp.gcno
+        -:    0:Data:tmp.gcda
+        -:    0:Runs:1
+        -:    0:Programs:1
+        -:    1:#include <stdio.h>
+        -:    2:
+        -:    3:template<class T>
+        -:    4:class Foo
+        -:    5:{
+        -:    6:  public:
+       1*:    7:  Foo(): b (1000) {}
+------------------
+Foo<char>::Foo():
+function Foo<char>::Foo() called 0 returned 0% blocks executed 0%
+    #####:    7:  Foo(): b (1000) {}
+------------------
+Foo<int>::Foo():
+function Foo<int>::Foo() called 1 returned 100% blocks executed 100%
+        1:    7:  Foo(): b (1000) {}
+------------------
+       2*:    8:  void inc () { b++; }
+------------------
+Foo<char>::inc():
+function Foo<char>::inc() called 0 returned 0% blocks executed 0%
+    #####:    8:  void inc () { b++; }
+------------------
+Foo<int>::inc():
+function Foo<int>::inc() called 2 returned 100% blocks executed 100%
+        2:    8:  void inc () { b++; }
+------------------
+        -:    9:
+        -:   10:  private:
+        -:   11:  int b;
+        -:   12:};
+        -:   13:
+        -:   14:template class Foo<int>;
+        -:   15:template class Foo<char>;
+        -:   16:
+        -:   17:int
+function main called 1 returned 100% blocks executed 81%
+        1:   18:main (void)
+        -:   19:{
+        -:   20:  int i, total;
+        1:   21:  Foo<int> counter;
+call    0 returned 100%
+branch  1 taken 100% (fallthrough)
+branch  2 taken 0% (throw)
+        -:   22:
+        1:   23:  counter.inc();
+call    0 returned 100%
+branch  1 taken 100% (fallthrough)
+branch  2 taken 0% (throw)
+        1:   24:  counter.inc();
+call    0 returned 100%
+branch  1 taken 100% (fallthrough)
+branch  2 taken 0% (throw)
+        1:   25:  total = 0;
+        -:   26:
+       11:   27:  for (i = 0; i < 10; i++)
+branch  0 taken 91% (fallthrough)
+branch  1 taken 9%
+       10:   28:    total += i;
+        -:   29:
+       1*:   30:  int v = total > 100 ? 1 : 2;
+branch  0 taken 0% (fallthrough)
+branch  1 taken 100%
+        -:   31:
+        1:   32:  if (total != 45)
+branch  0 taken 0% (fallthrough)
+branch  1 taken 100%
+    #####:   33:    printf ("Failure\n");
+call    0 never executed
+branch  1 never executed
+branch  2 never executed
+        -:   34:  else
+        1:   35:    printf ("Success\n");
+call    0 returned 100%
+branch  1 taken 100% (fallthrough)
+branch  2 taken 0% (throw)
+        1:   36:  return 0;
+        -:   37:}"""
+
+# This example is adapted from #226
+# <https://github.com/gcovr/gcovr/issues/226#issuecomment-368226650>
+# It is stripped down to the minimum useful testcase.
+GCOV_8_NAUTILUS = r"""
+        -:    0:Source:../src/nautilus-freedesktop-dbus.c
+        -:    0:Graph:/home/user/nautilus/_build/src/nautilus@sta/nautilus-freedesktop-dbus.c.gcno
+        -:    0:Data:-
+        -:    0:Runs:0
+        -:    0:Programs:0
+        -:    1:/*
+        -:    2: * nautilus-freedesktop-dbus: Implementation for the org.freedesktop DBus file-management interfaces
+        -:    3: *
+        -:    4: * Nautilus is free software; you can redistribute it and/or
+        -:    5: * modify it under the terms of the GNU General Public License as
+        -:    6: * published by the Free Software Foundation; either version 2 of the
+        -:    7: * License, or (at your option) any later version.
+        -:    8: *
+        -:    9: * Nautilus is distributed in the hope that it will be useful,
+        -:   10: * but WITHOUT ANY WARRANTY; without even the implied warranty of
+        -:   11: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+        -:   12: * General Public License for more details.
+        -:   13: *
+        -:   14: * You should have received a copy of the GNU General Public License
+        -:   15: * along with this program; if not, see <http://www.gnu.org/licenses/>.
+        -:   16: *
+        -:   17: * Authors: Akshay Gupta <kitallis@gmail.com>
+        -:   18: *          Federico Mena Quintero <federico@gnome.org>
+        -:   19: */
+        -:   50:
+    #####:   51:G_DEFINE_TYPE (NautilusFreedesktopDBus, nautilus_freedesktop_dbus, G_TYPE_OBJECT);
+------------------
+nautilus_freedesktop_dbus_get_type:
+function nautilus_freedesktop_dbus_get_type called 0 returned 0% blocks executed 0%
+    #####:   51:G_DEFINE_TYPE (NautilusFreedesktopDBus, nautilus_freedesktop_dbus, G_TYPE_OBJECT);
+branch  0 never executed
+branch  1 never executed
+call    2 never executed
+branch  3 never executed
+branch  4 never executed
+branch  5 never executed
+branch  6 never executed
+call    7 never executed
+call    8 never executed
+call    9 never executed
+------------------
+nautilus_freedesktop_dbus_class_intern_init:
+function nautilus_freedesktop_dbus_class_intern_init called 0 returned 0% blocks executed 0%
+    #####:   51:G_DEFINE_TYPE (NautilusFreedesktopDBus, nautilus_freedesktop_dbus, G_TYPE_OBJECT);
+call    0 never executed
+branch  1 never executed
+branch  2 never executed
+call    3 never executed
+call    4 never executed
+------------------
+    #####:   52:foo() ? bar():
+        -:   53:  baz();  // above line tests that sections can be terminated
+    #####:   53:qux();
+"""
+
+GCOV_8_SOURCES = dict(
+    gcov_8_example=GCOV_8_EXAMPLE,
+    nautilus_example=GCOV_8_NAUTILUS)
+
+GCOV_8_EXPECTED_UNCOVERED_LINES = dict(
+    gcov_8_example='33',
+    nautilus_example='51,53')
+
+GCOV_8_EXPECTED_UNCOVERED_BRANCHES = dict(
+    gcov_8_example='21,23,24,27,30,32,33,35',
+    nautilus_example='51')
+
+
+@pytest.mark.parametrize('sourcename', sorted(GCOV_8_SOURCES))
+def test_gcov_8(capsys, sourcename):
+    """Verify support for GCC 8 .gcov files.
+
+    GCC 8 introduces two changes:
+    -   for partial lines, the execution count is followed by an asterisk.
+    -   instantiations for templates and macros
+        are show broken down for each specialization
+    """
+
+    source = GCOV_8_SOURCES[sourcename]
+    lines = source.splitlines()[1:]
+    expected_uncovered_lines = GCOV_8_EXPECTED_UNCOVERED_LINES[sourcename]
+    expected_uncovered_branches = GCOV_8_EXPECTED_UNCOVERED_BRANCHES[sourcename]
+
+    parser = GcovParser("tmp.cpp", Logger())
+    parser.parse_all_lines(lines, exclude_unreachable_branches=False)
+
+    covdata = dict()
+    parser.update_coverage(covdata)
+    coverage = covdata['tmp.cpp']
+
+    uncovered_lines = coverage.uncovered_str(
+        exceptional=False, show_branch=False)
+    uncovered_branches = coverage.uncovered_str(
+        exceptional=False, show_branch=True)
+    assert uncovered_lines == expected_uncovered_lines
+    assert uncovered_branches == expected_uncovered_branches
+    out, err = capsys.readouterr()
+    assert (out, err) == ('', '')
+
+    parser.check_unrecognized_lines()
+    parser.check_unclosed_exclusions()
+    out, err = capsys.readouterr()
+    assert (out, err) == ('', '')
+
+
+def test_unknown_tags(capsys):
+    source = r"bananas 7 times 3"
+    lines = source.splitlines()
+
+    parser = GcovParser("foo.c", Logger())
+    parser.parse_all_lines(lines, exclude_unreachable_branches=False)
+
+    covdata = dict()
+    parser.update_coverage(covdata)
+    coverage = covdata['foo.c']
+
+    uncovered_lines = coverage.uncovered_str(
+        exceptional=False, show_branch=False)
+    uncovered_branches = coverage.uncovered_str(
+        exceptional=False, show_branch=True)
+    assert uncovered_lines == ''
+    assert uncovered_branches == ''
+    out, err = capsys.readouterr()
+    assert (out, err) == ('', '')
+
+    parser.check_unrecognized_lines()
+    parser.check_unclosed_exclusions()
+    out, err = capsys.readouterr()
+    assert out == ''
+    err_phrases = [
+        '(WARNING) Unrecognized GCOV output',
+        'bananas',
+        'github.com/gcovr/gcovr',
+    ]
+    for phrase in err_phrases:
+        assert phrase in err
+
+
+def test_pathologic_codeline(capsys):
+    source = r": 7:haha"
+    lines = source.splitlines()
+
+    parser = GcovParser("foo.c", Logger())
+    parser.parse_all_lines(lines, exclude_unreachable_branches=False)
+    parser.check_unrecognized_lines()
+    out, err = capsys.readouterr()
+    assert out == ''
+    err_phrases = [
+        '(WARNING) Unrecognized GCOV output',
+        ': 7:haha',
+        'Exception during parsing',
+        'IndexError',
+    ]
+    for phrase in err_phrases:
+        assert phrase in err