LumintelAnalytics · klingonaston · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025
diff --git a/backend/app/services/validation/__pycache__/validation_engine.cpython-313.pyc b/backend/app/services/validation/__pycache__/validation_engine.cpython-313.pyc
diff --git a/...services/validation/tests/__pycache__/test_validation_engine.cpython-313-pytest-8.4.2.pyc b/...services/validation/tests/__pycache__/test_validation_engine.cpython-313-pytest-8.4.2.pyc
diff --git a/backend/app/services/validation/tests/test_validation_engine.py b/backend/app/services/validation/tests/test_validation_engine.py
@@ -1,5 +1,54 @@
 import pytest
-from backend.app.services.validation.validation_engine import perform_cross_source_checks
+from backend.app.services.validation.validation_engine import perform_cross_source_checks, normalize_missing
+
+def test_normalize_missing():
+    data = {
+        "field1": "value1",
+        "field2": None,
+        "field3": "",
+        "nested": {
+            "nested_field1": "nested_value1",
+            "nested_field2": None,
+            "nested_field3": ""
+        },
+        "list_field": [
+            "item1",
+            None,
+            "item3",
+            ""
+        ]
+    }
+
+    expected_normalized_data = {
+        "field1": "value1",
+        "field2": "N/A",
+        "field3": "N/A",
+        "nested": {
+            "nested_field1": "nested_value1",
+            "nested_field2": "N/A",
+            "nested_field3": "N/A"
+        },
+        "list_field": [
+            "item1",
+            "N/A",
+            "item3",
+            "N/A"
+        ],
+        "missing_data_report": {
+            "field2": "Missing or empty field replaced with 'N/A'.",
+            "field3": "Missing or empty field replaced with 'N/A'.",
+            "nested.nested_field2": "Missing or empty field replaced with 'N/A'.",
+            "nested.nested_field3": "Missing or empty field replaced with 'N/A'.",
+            "list_field[1]": "Missing or empty field replaced with 'N/A'.",
+            "list_field[3]": "Missing or empty field replaced with 'N/A'."
+        }
+    }
+
+    import copy
+    original_data = copy.deepcopy(data)
+    normalized_data = normalize_missing(data)
+    assert normalized_data == expected_normalized_data
+    assert data == original_data
 
 def test_circulating_supply_match():
     data = {

diff --git a/backend/app/services/validation/validation_engine.py b/backend/app/services/validation/validation_engine.py
@@ -2,7 +2,9 @@
 Validation engine for ensuring data quality and consistency before NLG and summary generation.
 """
 
+import re
 from typing import Dict, Any, Optional, List
+from copy import deepcopy
 
 DEFAULT_ESSENTIAL_FIELDS = ["report_id", "project_name", "summary"] # Example default essential fields
 
@@ -119,6 +121,51 @@ def perform_cross_source_checks(data: Dict[str, Any]) -> Dict[str, Any]:
             "INFO: Documentation circulating supply not found."
         )
 
+    if validation_results["alerts"]:
+        validation_results["cross_source_checks"] = "COMPLETED_WITH_ALERTS"
+    else:
+        validation_results["cross_source_checks"] = "PASSED"
+
     return validation_results
 
-# You can add more validation functions as needed.
+
+def normalize_missing(data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Normalizes the input data by replacing missing or empty fields with explicit placeholders
+    and generates a `missing_data_report` explaining the gaps.
+
+    Args:
+        data: The input data dictionary to normalize.
+
+    Returns:
+        A new dictionary with missing fields normalized and a `missing_data_report`."""
+    normalized_data = deepcopy(data)
+    missing_data_report = {}
+
+    def _traverse_and_normalize(parent, key_or_index, current_data, path):
+        if isinstance(current_data, dict):
+            for key, value in current_data.items():
+                new_path = f"{path}.{key}" if path else key
+                if value is None or (isinstance(value, str) and value.strip() == ""):
+                    parent[key_or_index][key] = "N/A"  # Replace with placeholder
+                    missing_data_report[new_path] = "Missing or empty field replaced with 'N/A'."
+                elif isinstance(value, (dict, list)):
+                    _traverse_and_normalize(current_data, key, value, new_path)
+        elif isinstance(current_data, list):
+            for index, item in enumerate(current_data):
+                new_path = f"{path}[{index}]"
+                if item is None or (isinstance(item, str) and item.strip() == ""):
+                    parent[key_or_index][index] = "N/A"  # Replace with placeholder
+                    missing_data_report[new_path] = "Missing or empty field replaced with 'N/A'."
+                elif isinstance(item, (dict, list)):
+                    _traverse_and_normalize(current_data, index, item, new_path)
+
+    # Initial call to _traverse_and_normalize
+    # We use a temporary key '__root__' to hold the original data for the initial call
+    temp_root = {'__root__': normalized_data}
+    _traverse_and_normalize(temp_root, '__root__', normalized_data, "")
+    normalized_data.update(temp_root['__root__']) # Update normalized_data with the modified content
+    normalized_data["missing_data_report"] = missing_data_report
+    return normalized_data
+
+# You can add more validation functions as needed.