Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 53 additions & 3 deletions backend/app/cli/scap_to_openwatch_converter_enhanced.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,27 @@ class ComparisonResult:
TemplateProcessor
)

# Import SCAP YAML parser for variable/remediation extraction
import sys
sys.path.insert(0, str(Path(__file__).parent.parent))
from services.scap_yaml_parser_service import extract_scap_metadata

class EnhancedSCAPConverter:
"""Enhanced converter with BSON and dry-run support"""

def __init__(self, scap_content_path: str, output_path: str, dry_run: bool = False):
def __init__(
self,
scap_content_path: str,
output_path: str,
dry_run: bool = False,
extract_variables: bool = False,
extract_remediation: bool = False
):
self.scap_content_path = Path(scap_content_path)
self.output_path = Path(output_path)
self.dry_run = dry_run
self.extract_variables = extract_variables
self.extract_remediation = extract_remediation
self.framework_mapper = FrameworkMapper()
self.template_processor = TemplateProcessor()
self.stats = ConversionStats()
Expand Down Expand Up @@ -171,7 +185,8 @@ def _convert_single_rule(self, rule_file: Path, output_format: str) -> None:

def _build_openwatch_rule(self, rule_data: Dict[str, Any], rule_file: Path, rule_id: str) -> Dict[str, Any]:
"""Build OpenWatch rule structure (from original converter)"""
return {
# Base rule structure
rule = {
"_id": f"ow-{rule_id}",
"rule_id": f"ow-{rule_id}",
"scap_rule_id": f"xccdf_org.ssgproject.content_rule_{rule_id}",
Expand Down Expand Up @@ -212,6 +227,33 @@ def _build_openwatch_rule(self, rule_data: Dict[str, Any], rule_file: Path, rule
"identifiers": self._extract_identifiers(rule_data)
}

# Extract Phase 1 metadata (variables, remediation, scanner type)
if self.extract_variables or self.extract_remediation:
try:
extracted_metadata = extract_scap_metadata(rule_data, rule_file)

# Add XCCDF variables
if self.extract_variables and extracted_metadata.get('xccdf_variables'):
rule['xccdf_variables'] = extracted_metadata['xccdf_variables']

# Add remediation content
if self.extract_remediation and extracted_metadata.get('remediation'):
rule['remediation'] = extracted_metadata['remediation']

# Always add scanner type (defaults to 'oscap' if not detected)
rule['scanner_type'] = extracted_metadata.get('scanner_type', 'oscap')

except Exception as e:
logger.warning(f"Failed to extract metadata for {rule_id}: {e}")
# Set defaults on error
rule['scanner_type'] = 'oscap'

else:
# If not extracting, set default scanner type
rule['scanner_type'] = 'oscap'

return rule

def _write_json_rule(self, rule: Dict[str, Any], rule_id: str) -> None:
"""Write rule as JSON file"""
output_file = self.output_path / f"ow-{rule_id}.json"
Expand Down Expand Up @@ -493,6 +535,8 @@ def main():
convert_parser.add_argument('--output-path', default='/home/rracine/hanalyx/openwatch/data/compliance_rules_converted')
convert_parser.add_argument('--format', choices=['json', 'bson'], default='json')
convert_parser.add_argument('--dry-run', action='store_true', help='Show what would be converted')
convert_parser.add_argument('--extract-variables', action='store_true', help='Extract XCCDF variables (Phase 1)')
convert_parser.add_argument('--extract-remediation', action='store_true', help='Extract remediation content (Phase 1)')
convert_parser.add_argument('--create-bundle', action='store_true', help='Create tar.gz bundle after conversion')
convert_parser.add_argument('--bundle-version', default='0.0.1', help='Bundle version')

Expand All @@ -511,7 +555,13 @@ def main():
args = parser.parse_args()

if args.command == 'convert':
converter = EnhancedSCAPConverter(args.scap_path, args.output_path, args.dry_run)
converter = EnhancedSCAPConverter(
args.scap_path,
args.output_path,
args.dry_run,
args.extract_variables,
args.extract_remediation
)
stats = converter.convert_all_rules(args.format)

# Create bundle if requested
Expand Down
206 changes: 205 additions & 1 deletion backend/app/models/mongo_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,117 @@
)


class XCCDFVariable(BaseModel):
"""
XCCDF variable definition for scan-time customization

XCCDF variables allow users to customize compliance checks at scan time.
Examples: session timeout values, login banner text, password policies.

Supports Solution A (XCCDF Variables) for hybrid scanning architecture.
See: /docs/REMEDIATION_WITH_XCCDF_VARIABLES.md
"""

model_config = {
"exclude_none": True,
"exclude_unset": True
}

id: str = Field(
description="Variable identifier (e.g., 'var_accounts_tmout', 'login_banner_text')"
)
title: str = Field(
description="Human-readable variable title"
)
description: Optional[str] = Field(
default=None,
description="Detailed description of what this variable controls"
)
type: str = Field(
pattern="^(string|number|boolean)$",
description="Variable data type: string, number, or boolean"
)
default_value: str = Field(
description="Default value if user doesn't provide custom value"
)
interactive: bool = Field(
default=True,
description="Whether this variable can be customized via UI/API (set to False for system variables)"
)
sensitive: bool = Field(
default=False,
description="Whether this variable contains sensitive data (passwords, keys, etc.). Encrypted in storage, masked in UI."
)
constraints: Optional[Dict[str, Any]] = Field(
default=None,
description="""
Validation constraints for variable values:
- min_value/max_value: For numeric types
- min_length/max_length: For string types
- choices: List of allowed values (enum-like)
- pattern: Regex pattern for validation (string types)

Examples:
- {"min_value": 60, "max_value": 3600} # Session timeout 1-60 mins
- {"choices": ["300", "600", "900"]} # Predefined timeout options
- {"pattern": "^grub\\.pbkdf2\\.sha512\\."} # GRUB password hash format
"""
)

@validator('type')
def validate_type(cls, v):

Check notice

Code scanning / CodeQL

First parameter of a method is not named 'self' Note

Normal methods should have 'self', rather than 'cls', as their first parameter.

Copilot Autofix

AI 8 months ago

Copilot could not generate an autofix suggestion

Copilot could not generate an autofix suggestion for this alert. Try pushing a new commit or if the problem persists contact support.

"""Ensure type is one of the supported XCCDF types"""
valid_types = ['string', 'number', 'boolean']
if v not in valid_types:
raise ValueError(f"Invalid type '{v}'. Must be one of: {', '.join(valid_types)}")
return v

@validator('constraints')
def validate_constraints(cls, v, values):

Check notice

Code scanning / CodeQL

First parameter of a method is not named 'self' Note

Normal methods should have 'self', rather than 'cls', as their first parameter.

Copilot Autofix

AI 8 months ago

To fix the problem, we need to ensure the first parameter of the validate_constraints method is named self because @validator without additional flags expects an instance method, not a class method. This requires changing the method signature from def validate_constraints(cls, v, values): to def validate_constraints(self, v, values):. All internal references to cls in that method should be changed to self, although in this case, it appears none are used. No changes to the decorator or the call sites are necessary. The only change is within the body of the class PlatformImplementation in the file backend/app/models/mongo_models.py, on the lines defining the validate_constraints method.

Suggested changeset 1
backend/app/models/mongo_models.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/backend/app/models/mongo_models.py b/backend/app/models/mongo_models.py
--- a/backend/app/models/mongo_models.py
+++ b/backend/app/models/mongo_models.py
@@ -237,7 +237,7 @@
         return v
 
     @validator('constraints')
-    def validate_constraints(cls, v, values):
+    def validate_constraints(self, v, values):
         """Validate constraints match the variable type"""
         if not v:
             return v
EOF
@@ -237,7 +237,7 @@
return v

@validator('constraints')
def validate_constraints(cls, v, values):
def validate_constraints(self, v, values):
"""Validate constraints match the variable type"""
if not v:
return v
Copilot is powered by AI and may make mistakes. Always verify output.
"""Validate constraints match the variable type"""
if not v:
return v

var_type = values.get('type')

if var_type == 'number':
# Validate numeric constraints
if 'min_value' in v and 'max_value' in v:
if v['min_value'] > v['max_value']:
raise ValueError("min_value cannot be greater than max_value")

elif var_type == 'string':
# Validate string constraints
if 'min_length' in v and 'max_length' in v:
if v['min_length'] > v['max_length']:
raise ValueError("min_length cannot be greater than max_length")

# Validate pattern if provided
if 'pattern' in v:
import re
try:
re.compile(v['pattern'])
except re.error as e:
raise ValueError(f"Invalid regex pattern: {e}")

return v


class ComplianceRule(Document):
"""Enhanced MongoDB model for compliance rules with inheritance and multi-platform support"""

class Settings:
name = "compliance_rules"
use_state_management = True
validate_on_save = True
indexes = [
"rule_id", # Primary lookup
"scanner_type", # Phase 1: Route rules to appropriate scanner
"version", # Version queries
"is_latest", # Current version queries
[("rule_id", 1), ("version", -1)], # Compound: rule + version
[("scanner_type", 1), ("is_latest", 1)], # Phase 1: Latest rules by scanner type
]

# Core Identifiers
rule_id: str = Field(
Expand Down Expand Up @@ -414,7 +518,107 @@
default=None,
description="Rule ID that replaces this deprecated rule"
)


# ============================================================================
# Phase 1: Hybrid Scanning Architecture (XCCDF Variables + Native Scanners)
# ============================================================================

# XCCDF Variables for Scan-Time Customization (Solution A)
xccdf_variables: Optional[Dict[str, XCCDFVariable]] = Field(
default=None,
description="""
XCCDF variables that can be customized at scan time.

Enables user customization of compliance checks without modifying rules:
- Session timeouts (var_accounts_tmout)
- Login banners (login_banner_text)
- Password policies (var_password_pam_minlen)
- GRUB credentials (grub2_bootloader_password)
- etc.

Maps variable IDs to XCCDFVariable definitions.

Example:
{
"var_accounts_tmout": XCCDFVariable(
id="var_accounts_tmout",
title="Account Inactivity Timeout",
type="number",
default_value="600",
constraints={"min_value": 60, "max_value": 3600}
)
}

See: /docs/REMEDIATION_WITH_XCCDF_VARIABLES.md
"""
)

# Scanner Type Routing (Polyglot Scanner Architecture)
scanner_type: str = Field(
default="oscap",
pattern="^(oscap|inspec|python|bash|aws_api|azure_api|gcp_api|kubernetes|docker|sql|mongodb|elasticsearch|opa_rego|custom)$",
description="""
Scanner engine to use for this rule.

OpenWatch Native Scanning Engine uses domain-specific scanners:
- oscap: Traditional OSCAP/OVAL checks (Linux/Unix)
- inspec: Chef Inspec DSL checks
- python: Custom Python scripts (sandboxed)
- bash: Simple shell checks
- aws_api: AWS cloud resources (S3, IAM, VPC, etc.)
- azure_api: Azure cloud resources
- gcp_api: GCP cloud resources
- kubernetes: K8s resource compliance (kube-bench, OPA)
- docker: Container image scanning (Trivy, Falco)
- sql: Database configuration (PostgreSQL, MySQL, etc.)
- mongodb: MongoDB configuration checks
- elasticsearch: Elasticsearch settings
- opa_rego: Open Policy Agent / Rego policies
- custom: Organization-specific custom scanner

See: /docs/ADVANCED_SCANNING_ARCHITECTURE.md
"""
)

# Remediation Content for ORSA (Open Remediation Standard Adapter)
remediation: Optional[Dict[str, Any]] = Field(
default=None,
description="""
Remediation content for ORSA (Open Remediation Standard Adapter) plugins.

Supports multiple remediation formats extracted from XCCDF or custom-defined:
- ansible: Ansible tasks with variable bindings
- bash: Bash scripts with variable substitution
- puppet: Puppet manifests
- chef: Chef recipes
- powershell: PowerShell scripts (Windows)
- terraform: Terraform configuration changes (cloud)
- kubectl: Kubernetes manifest updates

Example:
{
"ansible": {
"tasks": "- name: Set timeout\\n lineinfile:\\n path: /etc/profile\\n line: 'TMOUT={{ var_accounts_tmout }}'",
"variables": ["var_accounts_tmout"],
"complexity": "low",
"disruption": "low"
},
"bash": {
"script": "echo 'TMOUT=$XCCDF_VALUE_VAR_ACCOUNTS_TMOUT' >> /etc/profile",
"variables": ["var_accounts_tmout"]
}
}

ORSA plugins extract remediation from this field and execute via appropriate tool.

See: /docs/PLUGIN_ARCHITECTURE.md (ORSA section)
"""
)

# ============================================================================
# End Phase 1 Fields
# ============================================================================

@validator('rule_id')
def validate_rule_id(cls, v):
if not v or len(v) < 3:
Expand Down
Loading
Loading