OpenSCAP · evgenyz · Mar 2, 2023 · Feb 2, 2023 · Feb 3, 2023 · Feb 6, 2023
diff --git a/.scrutinizer.yml b/.scrutinizer.yml
@@ -8,7 +8,7 @@ build:
         before:
             - pip3 install -r requirements.txt
             - npm i -g eslint
-            - pip3 install isort pylint flake8 coverage
+            - pip3 install isort pylint flake8 coverage jsonschema
     environment:
         python: 3.9.6
     nodes:

diff --git a/json_validator.py b/json_validator.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+
+# Copyright 2022, Red Hat, Inc.
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+import argparse
+import json
+import sys
+
+from jsonschema import validate
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(prog='JSON Schema validator')
+    parser.add_argument("-s",
+                        "--schema",
+                        type=str,
+                        default="./tests/json_schema_of_report.json",
+                        help="Path to schema of JSON to validate."
+                        )
+    parser.add_argument('JSON',
+                        type=argparse.FileType("r"),
+                        nargs='?',
+                        default=sys.stdin,
+                        help="JSON file source. Default: stdin"
+                        )
+    return parser.parse_args()
+
+
+def validate_json(schema_src, json_file):
+    json_schema = None
+    json_data = None
+
+    with open(schema_src, "r", encoding="utf-8") as schema_file:
+        json_schema = json.load(schema_file)
+
+    json_data = json.load(json_file)
+    json_file.close()
+
+    validate(json_data, json_schema)
+
+
+def main():
+    args = parse_args()
+    validate_json(args.schema, args.JSON)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/openscap_report/scap_results_parser/data_structures/group.py b/openscap_report/scap_results_parser/data_structures/group.py
@@ -3,6 +3,15 @@
 
 from dataclasses import asdict, dataclass, field
 
+GROUP_JSON_KEYS = [
+    "group_id",
+    "title",
+    "description",
+    "platforms",
+    "rules_ids",
+    "sub_groups",
+]
+
 
 @dataclass
 class Group:

diff --git a/openscap_report/scap_results_parser/data_structures/identifier.py b/openscap_report/scap_results_parser/data_structures/identifier.py
@@ -3,6 +3,11 @@
 
 from dataclasses import asdict, dataclass
 
+IDENTIFIER_JSON_KEYS = [
+    "system",
+    "text",
+]
+
 
 @dataclass
 class Identifier:

diff --git a/openscap_report/scap_results_parser/data_structures/json_transformation.py b/openscap_report/scap_results_parser/data_structures/json_transformation.py
@@ -0,0 +1,62 @@
+# Copyright 2022, Red Hat, Inc.
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+
+def rearrange_references(dictionary_json):
+    global_references = {}
+    for rule in dictionary_json["rules"].values():
+        new_rule_references = []
+        for ref in rule["references"]:
+            global_references[ref["text"]] = ref["href"]
+            new_rule_references.append(ref["text"])
+        rule["references"] = new_rule_references
+    dictionary_json["references"] = global_references
+
+
+def rearrange_identifiers(dictionary_json):
+    global_identifiers = {}
+    for rule in dictionary_json["rules"].values():
+        new_rule_identifiers = []
+        for ident in rule["identifiers"]:
+            global_identifiers[ident["text"]] = ident["system"]
+            new_rule_identifiers.append(ident["text"])
+        rule["identifiers"] = new_rule_identifiers
+    dictionary_json["identifiers"] = global_identifiers
+
+
+def _get_dict_or_value(val):
+    if isinstance(val, list):
+        out = []
+        for item in val:
+            out.append(_get_dict_or_value(item))
+        return out
+    if isinstance(val, dict):
+        return remove_empty_values(val)
+    return val
+
+
+def is_not_empty(val):
+    if val is None:
+        return False
+    if isinstance(val, float):
+        return True
+    return len(val) > 0
+
+
+def remove_empty_values(dictionary_json):
+    out = {}
+    for key, val in dictionary_json.items():
+        clean_value = _get_dict_or_value(val)
+        if is_not_empty(clean_value):
+            out[key] = clean_value
+    return out
+
+
+def remove_not_selected_rules(dictionary_json, ids_of_selected_rules):
+    selected_rules = {}
+    for rule_id, rule in dictionary_json["rules"].items():
+        if rule_id in ids_of_selected_rules or (
+            rule["result"] != "notselected" and not is_not_empty(ids_of_selected_rules)
+        ):
+            selected_rules[rule_id] = rule
+    dictionary_json["rules"] = selected_rules
diff --git a/openscap_report/scap_results_parser/data_structures/oval_definition.py b/openscap_report/scap_results_parser/data_structures/oval_definition.py
@@ -3,6 +3,13 @@
 
 from .oval_node import OvalNode
 
+OVAL_DEFINITION_JSON_KEYS = [
+    "definition_id",
+    "title",
+    "description",
+    "version",
+]
+
 
 @dataclass
 class OvalDefinition:

diff --git a/openscap_report/scap_results_parser/data_structures/profile_info.py b/openscap_report/scap_results_parser/data_structures/profile_info.py
@@ -3,6 +3,13 @@
 
 from dataclasses import asdict, dataclass, field
 
+PROFILE_JSON_KEYS = [
+    "profile_id",
+    "description",
+    "title",
+    "extends",
+]
+
 
 @dataclass
 class ProfileInfo:

diff --git a/openscap_report/scap_results_parser/data_structures/reference.py b/openscap_report/scap_results_parser/data_structures/reference.py
@@ -3,6 +3,11 @@
 
 from dataclasses import asdict, dataclass
 
+REFERENCE_JSON_KEYS = [
+    "href",
+    "text",
+]
+
 
 @dataclass
 class Reference:

diff --git a/openscap_report/scap_results_parser/data_structures/remediation.py b/openscap_report/scap_results_parser/data_structures/remediation.py
@@ -3,6 +3,15 @@
 
 from dataclasses import asdict, dataclass
 
+REMEDIATION_JSON_KEYS = [
+    "remediation_id",
+    "system",
+    "complexity",
+    "disruption",
+    "strategy",
+    "fix",
+]
+
 
 @dataclass
 class Remediation:

diff --git a/openscap_report/scap_results_parser/data_structures/report.py b/openscap_report/scap_results_parser/data_structures/report.py
@@ -5,10 +5,33 @@
 from dataclasses import asdict, dataclass, field
 
 from ..exceptions import MissingProcessableRules
-from .group import Group
-from .profile_info import ProfileInfo
-from .result_of_scan import ResultOfScan
-from .rule import Rule
+from .group import GROUP_JSON_KEYS, Group
+from .identifier import IDENTIFIER_JSON_KEYS
+from .json_transformation import (rearrange_identifiers, rearrange_references,
+                                  remove_empty_values,
+                                  remove_not_selected_rules)
+from .oval_definition import OVAL_DEFINITION_JSON_KEYS
+from .profile_info import PROFILE_JSON_KEYS, ProfileInfo
+from .reference import REFERENCE_JSON_KEYS
+from .remediation import REMEDIATION_JSON_KEYS
+from .result_of_scan import SCAN_JSON_KEYS, ResultOfScan
+from .rule import RULE_JSON_KEYS, Rule
+from .warning import WARNING_JSON_KEYS
+
+JSON_REPORT_CONTENT = [
+    "profile_info",
+    "scan_result",
+    "rules",
+    *GROUP_JSON_KEYS,
+    *IDENTIFIER_JSON_KEYS,
+    *OVAL_DEFINITION_JSON_KEYS,
+    *PROFILE_JSON_KEYS,
+    *REFERENCE_JSON_KEYS,
+    *REMEDIATION_JSON_KEYS,
+    *RULE_JSON_KEYS,
+    *SCAN_JSON_KEYS,
+    *WARNING_JSON_KEYS,
+]
 
 
 @dataclass
@@ -20,18 +43,15 @@ class Report:
 
     @staticmethod
     def default_json_filter(dictionary):
-        allowed_keys = [
-            "title",
-            "profile_name",
-            "cpe_platforms",
-            "scanner",
-            "benchmark_id",
-            "score"
-        ]
-        return {key: value for (key, value) in dictionary if key in allowed_keys}
+        return {key: value for (key, value) in dictionary if key in JSON_REPORT_CONTENT}
 
     def as_dict_for_default_json(self):
-        return asdict(self, dict_factory=self.default_json_filter)
+        json_dict = asdict(self, dict_factory=self.default_json_filter)
+        remove_not_selected_rules(json_dict, self.profile_info.selected_rules_ids)
+        rearrange_references(json_dict)
+        rearrange_identifiers(json_dict)
+        json_dict = remove_empty_values(json_dict)
+        return json_dict
 
     def as_dict(self):
         return asdict(self)

diff --git a/openscap_report/scap_results_parser/data_structures/result_of_scan.py b/openscap_report/scap_results_parser/data_structures/result_of_scan.py
@@ -3,6 +3,24 @@
 
 from dataclasses import asdict, dataclass, field
 
+SCAN_JSON_KEYS = [
+    "title",
+    "identity",
+    "profile_id",
+    "target",
+    "cpe_platforms",
+    "scanner",
+    "scanner_version",
+    "benchmark_url",
+    "benchmark_id",
+    "benchmark_version",
+    "start_time",
+    "end_time",
+    "test_system",
+    "score",
+    "score_max",
+]
+
 
 @dataclass
 class ResultOfScan:  # pylint: disable=R0902

diff --git a/openscap_report/scap_results_parser/data_structures/rule.py b/openscap_report/scap_results_parser/data_structures/rule.py
@@ -11,6 +11,23 @@
 from .remediation import Remediation
 from .warning import RuleWarning
 
+RULE_JSON_KEYS = [
+    "rule_id",
+    "title",
+    "result",
+    "time",
+    "severity",
+    "identifiers",
+    "references",
+    "description",
+    "rationale",
+    "warnings",
+    "platforms",
+    "oval_definition_id",
+    "messages",
+    "remediations",
+]
+
 
 @dataclass
 class Rule:  # pylint: disable=R0902

diff --git a/openscap_report/scap_results_parser/data_structures/warning.py b/openscap_report/scap_results_parser/data_structures/warning.py
@@ -2,6 +2,11 @@
 # SPDX-License-Identifier: LGPL-2.1-or-later
 from dataclasses import asdict, dataclass
 
+WARNING_JSON_KEYS = [
+    "text",
+    "category",
+]
+
 
 @dataclass
 class RuleWarning:

diff --git a/tests/constants.py b/tests/constants.py
@@ -30,3 +30,5 @@
 PATH_TO_XCCDF_WITHOUT_INFO = Path(__file__).parent / "test_data/xccdf-with-removed-info.xml"
 PATH_TO_XCCDF_WITHOUT_SYSTEM_DATA = Path(__file__).parent / "test_data/xccdf_no_system_data.xml"
 PATH_TO_XCCDF_WITH_MULTI_CHECK = Path(__file__).parent / "test_data/xccdf_multi_check.xml"
+
+PATH_TO_JSON_SCHEMA = Path(__file__).parent / "json_schema_of_report.json"
diff --git a/tests/integration_tests/test_json.py b/tests/integration_tests/test_json.py
@@ -0,0 +1,30 @@
+# Copyright 2022, Red Hat, Inc.
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+import json
+
+import pytest
+from jsonschema import validate
+
+from openscap_report.debug_settings import DebugSetting
+from openscap_report.report_generators.json import JSONReportGenerator
+from tests.unit_tests.test_data_structure import get_parser, get_report
+
+from ..constants import PATH_TO_ARF, PATH_TO_JSON_SCHEMA
+
+
+@pytest.mark.integration_test
+def test_json_structure_with_schema():
+    json_schema = None
+    with open(PATH_TO_JSON_SCHEMA, "r", encoding="utf-8") as schema_file:
+        json_schema = json.load(schema_file)
+    json_gen = JSONReportGenerator(get_parser(PATH_TO_ARF))
+    json_data = json_gen.generate_report(DebugSetting()).read().decode("utf-8")
+    validate(json.loads(json_data), json_schema)
+
+
+@pytest.mark.integration_test
+def test_json_count_of_rules():
+    report = get_report()
+    json_dict = report.as_dict_for_default_json()
+    assert len(json_dict["rules"]) == 714