diff --git a/bandit/__init__.py b/bandit/__init__.py index 75f863db2..7c7bf00a8 100644 --- a/bandit/__init__.py +++ b/bandit/__init__.py @@ -16,4 +16,5 @@ from bandit.core.issue import * # noqa from bandit.core.test_properties import * # noqa +__author__ = metadata.metadata("bandit")["Author"] __version__ = metadata.version("bandit") diff --git a/bandit/formatters/sarif.py b/bandit/formatters/sarif.py new file mode 100644 index 000000000..faea22a65 --- /dev/null +++ b/bandit/formatters/sarif.py @@ -0,0 +1,365 @@ +# Copyright (c) Microsoft. All Rights Reserved. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Note: this code mostly incorporated from +# https://github.com/microsoft/bandit-sarif-formatter +# +r""" +=============== +SARIF formatter +=============== + +This formatter outputs the issues in SARIF formatted JSON. + +:Example: + +.. code-block:: javascript + + { + "runs": [ + { + "tool": { + "driver": { + "name": "Bandit", + "organization": "PyCQA", + "rules": [ + { + "id": "B101", + "name": "assert_used", + "properties": { + "tags": [ + "security", + "external/cwe/cwe-703" + ], + "precision": "high" + }, + "helpUri": "https://bandit.readthedocs.io/en/1.7.8/plugins/b101_assert_used.html" + } + ], + "version": "1.7.8", + "semanticVersion": "1.7.8" + } + }, + "invocations": [ + { + "executionSuccessful": true, + "endTimeUtc": "2024-03-05T03:28:48Z" + } + ], + "properties": { + "metrics": { + "_totals": { + "loc": 1, + "nosec": 0, + "skipped_tests": 0, + "SEVERITY.UNDEFINED": 0, + "CONFIDENCE.UNDEFINED": 0, + "SEVERITY.LOW": 1, + "CONFIDENCE.LOW": 0, + "SEVERITY.MEDIUM": 0, + "CONFIDENCE.MEDIUM": 0, + "SEVERITY.HIGH": 0, + "CONFIDENCE.HIGH": 1 + }, + "./examples/assert.py": { + "loc": 1, + "nosec": 0, + "skipped_tests": 0, + "SEVERITY.UNDEFINED": 0, + "SEVERITY.LOW": 1, + "SEVERITY.MEDIUM": 0, + "SEVERITY.HIGH": 0, + "CONFIDENCE.UNDEFINED": 0, + "CONFIDENCE.LOW": 0, + "CONFIDENCE.MEDIUM": 0, + "CONFIDENCE.HIGH": 1 + } + } + }, + "results": [ + { + "message": { + "text": "Use of assert detected. The enclosed code will be removed when compiling to optimised byte code." + }, + "level": "note", + "locations": [ + { + "physicalLocation": { + "region": { + "snippet": { + "text": "assert True\n" + }, + "endColumn": 11, + "endLine": 1, + "startColumn": 0, + "startLine": 1 + }, + "artifactLocation": { + "uri": "examples/assert.py" + }, + "contextRegion": { + "snippet": { + "text": "assert True\n" + }, + "endLine": 1, + "startLine": 1 + } + } + } + ], + "properties": { + "issue_confidence": "HIGH", + "issue_severity": "LOW" + }, + "ruleId": "B101", + "ruleIndex": 0 + } + ] + } + ], + "version": "2.1.0", + "$schema": "https://json.schemastore.org/sarif-2.1.0.json" + } + +.. versionadded:: 1.7.8 + +""" # noqa: E501 +import logging +import pathlib +import sys +import urllib.parse as urlparse +from datetime import datetime + +import sarif_om as om +from jschema_to_python.to_json import to_json + +import bandit +from bandit.core import docs_utils + +LOG = logging.getLogger(__name__) +SCHEMA_URI = "https://json.schemastore.org/sarif-2.1.0.json" +TS_FORMAT = "%Y-%m-%dT%H:%M:%SZ" + + +def report(manager, fileobj, sev_level, conf_level, lines=-1): + """Prints issues in SARIF format + + :param manager: the bandit manager object + :param fileobj: The output file object, which may be sys.stdout + :param sev_level: Filtering severity level + :param conf_level: Filtering confidence level + :param lines: Number of lines to report, -1 for all + """ + + log = om.SarifLog( + schema_uri=SCHEMA_URI, + version="2.1.0", + runs=[ + om.Run( + tool=om.Tool( + driver=om.ToolComponent( + name="Bandit", + organization=bandit.__author__, + semantic_version=bandit.__version__, + version=bandit.__version__, + ) + ), + invocations=[ + om.Invocation( + end_time_utc=datetime.utcnow().strftime(TS_FORMAT), + execution_successful=True, + ) + ], + properties={"metrics": manager.metrics.data}, + ) + ], + ) + + run = log.runs[0] + invocation = run.invocations[0] + + skips = manager.get_skipped() + add_skipped_file_notifications(skips, invocation) + + issues = manager.get_issue_list(sev_level=sev_level, conf_level=conf_level) + + add_results(issues, run) + + serializedLog = to_json(log) + + with fileobj: + fileobj.write(serializedLog) + + if fileobj.name != sys.stdout.name: + LOG.info("SARIF output written to file: %s", fileobj.name) + + +def add_skipped_file_notifications(skips, invocation): + if skips is None or len(skips) == 0: + return + + if invocation.tool_configuration_notifications is None: + invocation.tool_configuration_notifications = [] + + for skip in skips: + (file_name, reason) = skip + + notification = om.Notification( + level="error", + message=om.Message(text=reason), + locations=[ + om.Location( + physical_location=om.PhysicalLocation( + artifact_location=om.ArtifactLocation( + uri=to_uri(file_name) + ) + ) + ) + ], + ) + + invocation.tool_configuration_notifications.append(notification) + + +def add_results(issues, run): + if run.results is None: + run.results = [] + + rules = {} + rule_indices = {} + for issue in issues: + result = create_result(issue, rules, rule_indices) + run.results.append(result) + + if len(rules) > 0: + run.tool.driver.rules = list(rules.values()) + + +def create_result(issue, rules, rule_indices): + issue_dict = issue.as_dict() + + rule, rule_index = create_or_find_rule(issue_dict, rules, rule_indices) + + physical_location = om.PhysicalLocation( + artifact_location=om.ArtifactLocation( + uri=to_uri(issue_dict["filename"]) + ) + ) + + add_region_and_context_region( + physical_location, + issue_dict["line_range"], + issue_dict["col_offset"], + issue_dict["end_col_offset"], + issue_dict["code"], + ) + + return om.Result( + rule_id=rule.id, + rule_index=rule_index, + message=om.Message(text=issue_dict["issue_text"]), + level=level_from_severity(issue_dict["issue_severity"]), + locations=[om.Location(physical_location=physical_location)], + properties={ + "issue_confidence": issue_dict["issue_confidence"], + "issue_severity": issue_dict["issue_severity"], + }, + ) + + +def level_from_severity(severity): + if severity == "HIGH": + return "error" + elif severity == "MEDIUM": + return "warning" + elif severity == "LOW": + return "note" + else: + return "warning" + + +def add_region_and_context_region( + physical_location, line_range, col_offset, end_col_offset, code +): + first_line_number, snippet_lines = parse_code(code) + snippet_line = snippet_lines[line_range[0] - first_line_number] + + physical_location.region = om.Region( + start_line=line_range[0], + end_line=line_range[1] if len(line_range) > 1 else line_range[0], + start_column=col_offset + 1, + end_column=end_col_offset + 1, + snippet=om.ArtifactContent(text=snippet_line), + ) + + physical_location.context_region = om.Region( + start_line=first_line_number, + end_line=first_line_number + len(snippet_lines) - 1, + snippet=om.ArtifactContent(text="".join(snippet_lines)), + ) + + +def parse_code(code): + code_lines = code.split("\n") + + # The last line from the split has nothing in it; it's an artifact of the + # last "real" line ending in a newline. Unless, of course, it doesn't: + last_line = code_lines[len(code_lines) - 1] + + last_real_line_ends_in_newline = False + if len(last_line) == 0: + code_lines.pop() + last_real_line_ends_in_newline = True + + snippet_lines = [] + first = True + for code_line in code_lines: + number_and_snippet_line = code_line.split(" ", 1) + if first: + first_line_number = int(number_and_snippet_line[0]) + first = False + + snippet_line = number_and_snippet_line[1] + "\n" + snippet_lines.append(snippet_line) + + if not last_real_line_ends_in_newline: + last_line = snippet_lines[len(snippet_lines) - 1] + snippet_lines[len(snippet_lines) - 1] = last_line[: len(last_line) - 1] + + return first_line_number, snippet_lines + + +def create_or_find_rule(issue_dict, rules, rule_indices): + rule_id = issue_dict["test_id"] + if rule_id in rules: + return rules[rule_id], rule_indices[rule_id] + + rule = om.ReportingDescriptor( + id=rule_id, + name=issue_dict["test_name"], + help_uri=docs_utils.get_url(rule_id), + properties={ + "tags": [ + "security", + f"external/cwe/cwe-{issue_dict['issue_cwe'].get('id')}", + ], + "precision": issue_dict["issue_confidence"].lower(), + }, + ) + + index = len(rules) + rules[rule_id] = rule + rule_indices[rule_id] = index + return rule, index + + +def to_uri(file_path): + pure_path = pathlib.PurePath(file_path) + if pure_path.is_absolute(): + return pure_path.as_uri() + else: + # Replace backslashes with slashes. + posix_path = pure_path.as_posix() + # %-encode special characters. + return urlparse.quote(posix_path) diff --git a/doc/source/formatters/sarif.rst b/doc/source/formatters/sarif.rst new file mode 100644 index 000000000..58b9633a7 --- /dev/null +++ b/doc/source/formatters/sarif.rst @@ -0,0 +1,5 @@ +----- +sarif +----- + +.. automodule:: bandit.formatters.sarif diff --git a/doc/source/man/bandit.rst b/doc/source/man/bandit.rst index 46125e613..eef10d271 100644 --- a/doc/source/man/bandit.rst +++ b/doc/source/man/bandit.rst @@ -44,7 +44,7 @@ OPTIONS (-l for LOW, -ll for MEDIUM, -lll for HIGH) -i, --confidence report only issues of a given confidence level or higher (-i for LOW, -ii for MEDIUM, -iii for HIGH) - -f {csv,custom,html,json,screen,txt,xml,yaml}, --format {csv,custom,html,json,screen,txt,xml,yaml} + -f {csv,custom,html,json,sarif,screen,txt,xml,yaml}, --format {csv,custom,html,json,sarif,screen,txt,xml,yaml} specify output format --msg-template MSG_TEMPLATE specify output message template (only usable with diff --git a/doc/source/start.rst b/doc/source/start.rst index 069ec7108..cd8f3dadf 100644 --- a/doc/source/start.rst +++ b/doc/source/start.rst @@ -38,6 +38,13 @@ extras: pip install bandit[baseline] +If you want to include SARIF output formatter support, install it with the +`sarif` extras: + +.. code-block:: console + + pip install bandit[sarif] + Run Bandit: .. code-block:: console diff --git a/setup.cfg b/setup.cfg index 54d4096a2..2dbee597c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,6 +37,9 @@ toml = tomli>=1.1.0; python_version < "3.11" baseline = GitPython>=3.1.30 +sarif = + sarif-om>=1.0.4 + jschema-to-python>=1.2.3 [entry_points] console_scripts = @@ -52,6 +55,7 @@ bandit.formatters = txt = bandit.formatters.text:report xml = bandit.formatters.xml:report html = bandit.formatters.html:report + sarif = bandit.formatters.sarif:report screen = bandit.formatters.screen:report yaml = bandit.formatters.yaml:report custom = bandit.formatters.custom:report diff --git a/tox.ini b/tox.ini index 27b3d75e7..13e3458de 100644 --- a/tox.ini +++ b/tox.ini @@ -14,6 +14,7 @@ extras = yaml toml baseline + sarif commands = find bandit -type f -name "*.pyc" -delete stestr run {posargs}