diff --git a/.github/scripts/merge_sarif.py b/.github/scripts/merge_sarif.py new file mode 100755 index 000000000..984338ca0 --- /dev/null +++ b/.github/scripts/merge_sarif.py @@ -0,0 +1,63 @@ +#! /usr/bin/env python3 + +# The purpose of this file is to adapt the output from +# Clang's static analyzer into a format suitable for GitHub +# Actions. The problem is that Clang outputs a separate "run" +# per file in its SARIF output, but GitHub requires a single +# run per tool (Clang is wrong here). + +import sys +import json + +if len(sys.argv) < 2: + print("Usage: munge-sarif.py INPUT", file=sys.stderr) + sys.exit(1) + +data = None +with open(sys.argv[1], 'rb') as f: + data = json.load(f) + +# Arbitrarily pick the first run as the one from which to copy all the properties +base_run = data['runs'][0] + +# We don't need these, GitHub ignores them +base_run['artifacts'] = [] + +# Concatenate results +for r in data['runs'][1:]: + base_run['results'].extend(r['results']) + +data['runs'] = [base_run] + +def fix_region(region): + startLine = region.get('startLine', None) + startColumn = region.get('startColumn', 1) + endLine = region.get('endLine', None) + endColumn = region.get('endColumn', None) + if startLine is None: + raise ValueError("Region must have startLine") + if endLine is not None and endLine < startLine: + region['endLine'] = startLine + del region['endColumn'] + endLine = startLine + endColumn = None + if endColumn is not None and (endLine == startLine or endLine is None) and endColumn < startColumn: + region['endColumn'] = startColumn + endColumn = startColumn + +# Recursively scan the data dictionary, and apply the fix_region() function +# to all "region":Region key-value pairs. +def fix_regions(data): + if isinstance(data, dict): + if 'region' in data: + fix_region(data['region']) + for key, value in data.items(): + fix_regions(value) + elif isinstance(data, list): + for item in data: + fix_regions(item) + +fix_regions(data) + +with open(sys.argv[1], 'w') as f: + json.dump(data, f, indent=2) \ No newline at end of file diff --git a/.github/workflows/clang-analyzer.yml b/.github/workflows/clang-analyzer.yml index ea84b571c..6ae38eaa2 100644 --- a/.github/workflows/clang-analyzer.yml +++ b/.github/workflows/clang-analyzer.yml @@ -18,14 +18,6 @@ jobs: security-events: write contents: read - env: - # The @microsoft/sarif-multitool tool actually uses DotnetCore, which in - # turn aborts when it finds that GitHub's CI machine doesn't have ICU. - # Just turn off localisation. A future version of the ubuntu-24.04 or - # ubuntu-latest runners might not need this workaround. - # https://github.com/actions/runner-images/issues/10989 - DOTNET_SYSTEM_GLOBALIZATION_INVARIANT: 1 - steps: - name: Setup run: | @@ -37,36 +29,30 @@ jobs: with: submodules: true - - name: Configure + - name: Configure & Build run: | + LLVM_VER=`clang --version | head -n1 | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f1` + echo "Using LLVM version $LLVM_VER" + mkdir build cd build - scan-build cmake -G Ninja -DPCRE2_SUPPORT_JIT=ON -DCMAKE_BUILD_TYPE=Debug .. - - name: Build - run: | - # Inefficiently run clang scan twice; once to generate HTML, and secondly - # to generate SARIF files. Ideally we would have some way to scan once and - # generate one of those outputs from the other, but I don't know a good way - # to do that. - cd build - scan-build -o clang-report/ ninja + scan-build-py-$LLVM_VER -o clang-sarif-root/ --sarif-html sh -c "cmake -G Ninja -DPCRE2_SUPPORT_JIT=ON -DCMAKE_BUILD_TYPE=Debug .. && ninja" + rm clang-sarif-root/*/result-*.sarif + mv clang-sarif-root/* ../clang-report - ninja clean - scan-build -o clang-sarif -sarif ninja - # Work around issue in GitHub's SARIF ingestion - merge all SARIF files into one - npx -y @microsoft/sarif-multitool merge clang-sarif/*/*.sarif --output-file=clang.sarif + ../.github/scripts/merge_sarif.py ../clang-report/results-merged.sarif # Upload the browsable HTML report as an artifact. - name: Upload report uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: "Clang Static Analyzer report" - path: './build/clang-report' + path: './clang-report' # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" uses: github/codeql-action/upload-sarif@51f77329afa6477de8c49fc9c7046c15b9a4e79d # v3.29.5 with: - sarif_file: build/clang.sarif + sarif_file: ./clang-report/results-merged.sarif category: clang-analyzer