In [1]:
%pip install pyyaml

Note: you may need to restart the kernel to use updated packages.


In [7]:
import json
from pathlib import Path

import yaml

from src.axiomatic_kernel import (
    AxiomKernel,
    VariableSchema,
    AxiomDefinition,
    DecisionLogger,
)
from src.nl_rule_parser import (
    build_axiom_from_nl,
    RuleParseError,
)
from src.explanation_engine import (
    DecisionExplainer,
    ExplanationConfig,
)
from src.rules_io import load_ruleset_from_file, apply_ruleset_to_kernel

In [8]:


# === 1) SCHEMA DOPASOWANA DO FRAUD_RULES (tylko typy obs≈Çugiwane przez kernel) ===
schema = [
    VariableSchema("amount", "int", "Kwota transakcji w jednostkach minimalnych."),
    VariableSchema("tx_count_24h", "int", "Liczba transakcji w ostatnich 24h."),
    VariableSchema("is_pep", "bool", "Czy klient jest PEP."),
    VariableSchema("is_suspicious", "bool", "Czy transakcja jest podejrzana."),
]

logger = DecisionLogger("logs/fraud_rules_demo.jsonl")

kernel = AxiomKernel(
    schema=schema,
    decision_variable="is_suspicious",
    logger=logger,
    rule_version="fraud_rules_v1",
)

# === 2) Wczytanie rulesetu z pliku YAML ===
rules_path = Path("rules") / "fraud_rules_v1.yaml"
rules_path.parent.mkdir(exist_ok=True)

ruleset = load_ruleset_from_file(rules_path)

print(f"üìò Wczytano ruleset: {ruleset.ruleset_id} (v{ruleset.version})")
print(f"Opis: {ruleset.description}")
print(f"Liczba regu≈Ç w pliku (≈ÇƒÖcznie): {len(ruleset.rules)}")

# === 3) Na≈Ço≈ºenie rulesetu na kernel ===
summary = apply_ruleset_to_kernel(
    kernel=kernel,
    ruleset=ruleset,
    schema=schema,                         # üëà NOWE: przekazujemy schema
    decision_field_fallback="is_suspicious",  # üëà NOWE: ta sama zmienna co decision_variable
    strict=True,                           # przerwij przy pierwszym b≈Çƒôdzie
    extra_metadata={"domain": "fraud-demo"},
)

print("\nüìä Podsumowanie ≈Çadowania regu≈Ç:")
print(f"- total_rules:   {summary.total_rules}")
print(f"- enabled_rules: {summary.enabled_rules}")
print(f"- loaded_rules:  {summary.loaded_rules}")
print(f"- skipped_rules: {summary.skipped_rules}")
print(f"- errors:        {summary.errors}")


üìò Wczytano ruleset: fraud_rules_v1 (v1.0.0)
Opis: Podstawowe regu≈Çy fraud / AML
Liczba regu≈Ç w pliku (≈ÇƒÖcznie): 3

üìä Podsumowanie ≈Çadowania regu≈Ç:
- total_rules:   3
- enabled_rules: 2
- loaded_rules:  2
- skipped_rules: 1
- errors:        {}


In [None]:
# Katalog i plik z regu≈Çami dla tego notebooka
rules_dir = Path("rules")
rules_dir.mkdir(exist_ok=True)

rules_file = rules_dir / "notebook_rules.yaml"

# Domy≈õlny zestaw regu≈Ç ‚Äì u≈ºyty tylko, je≈õli plik nie istnieje.
default_rules_yaml = """rules:
  - id: "nl_high_risk_flag"
    text: "If amount > 10000 and risk_score > 5 then flag = true"
  - id: "nl_low_risk_clear"
    text: "If risk_score <= 2 then flag = false"
"""

if not rules_file.exists():
    rules_file.write_text(default_rules_yaml, encoding="utf-8")
    print(f"Utworzono domy≈õlny plik z regu≈Çami: {rules_file}")
else:
    print(f"U≈ºywam istniejƒÖcego pliku z regu≈Çami: {rules_file}")

# Wczytanie regu≈Ç z pliku YAML
with rules_file.open("r", encoding="utf-8") as f:
    rules_data = yaml.safe_load(f)

rules_list = rules_data.get("rules", [])

axioms = []
for raw_rule in rules_list:
    rule_id = raw_rule["id"]
    text = raw_rule["text"]

    axiom = build_axiom_from_nl(
        rule_id=rule_id,
        text=text,
        schema=schema,
        decision_field_fallback="flag",
    )
    kernel.add_axiom_safe(axiom)
    axioms.append(axiom)

print("Dodane regu≈Çy:", [a.id for a in axioms])


In [9]:
explainer = DecisionExplainer(ExplanationConfig(language="pl"))

# Przypadek wysokiego ryzyka ‚Äì powinno byƒá FLAGGED
case_flagged = {"amount": 15_000, "risk_score": 7}
bundle_flagged = kernel.evaluate(case_flagged)

print("=== RAW BUNDLE (FLAGGED) ===")
print(json.dumps(bundle_flagged, indent=2, ensure_ascii=False))

print("\n=== WYJA≈öNIENIE (FLAGGED) ===")
print(explainer.explain(bundle_flagged).to_text(language="pl"))



# Przypadek niskiego ryzyka ‚Äì powinno byƒá CLEAN
case_clean = {"amount": 500, "risk_score": 1}
bundle_clean = kernel.evaluate(case_clean)

print("=== RAW BUNDLE (CLEAN) ===")
print(json.dumps(bundle_clean, indent=2, ensure_ascii=False))

print("\n=== WYJA≈öNIENIE (CLEAN) ===")
print(explainer.explain(bundle_clean).to_text(language="pl"))


=== RAW BUNDLE (FLAGGED) ===
{
  "decision_status": "SAT",
  "decision": "FLAGGED",
  "facts": {
    "amount": 15000
  },
  "model": {
    "amount": 15000,
    "tx_count_24h": 0,
    "is_pep": false,
    "is_suspicious": true
  },
  "satisfied_axioms": [
    {
      "id": "fraud.high_amount",
      "description": "IF amount > 10000 THEN is_suspicious = TRUE",
      "holds": true,
      "antecedent_true": true
    },
    {
      "id": "fraud.velocity",
      "description": "IF tx_count_24h > 5 THEN is_suspicious = TRUE",
      "holds": true,
      "antecedent_true": false
    }
  ],
  "violated_axioms": [],
  "active_axioms": [
    {
      "id": "fraud.high_amount",
      "description": "IF amount > 10000 THEN is_suspicious = TRUE",
      "holds": true,
      "antecedent_true": true
    }
  ],
  "inactive_actions": [
    {
      "id": "fraud.velocity",
      "description": "IF tx_count_24h > 5 THEN is_suspicious = TRUE",
      "holds": true,
      "antecedent_true": false
    }
  ],
  "

In [10]:
from z3 import Implies  # type: ignore

unsat_kernel = AxiomKernel(
    schema=schema,
    decision_variable="flag",
    logger=None,
    rule_version="demo_unsat_v1",
)

def rule_flag_true(vars_z3):
    amount = vars_z3["amount"]
    flag = vars_z3["flag"]
    return Implies(amount > 10_000, flag == True)

def rule_flag_false(vars_z3):
    amount = vars_z3["amount"]
    flag = vars_z3["flag"]
    return Implies(amount > 10_000, flag == False)

unsat_kernel.add_axiom(
    AxiomDefinition(
        id="amount_flag_true",
        description="If amount > 10000 then flag must be True.",
        build_constraint=rule_flag_true,
    )
)
unsat_kernel.add_axiom(
    AxiomDefinition(
        id="amount_flag_false",
        description="If amount > 10000 then flag must be False.",
        build_constraint=rule_flag_false,
    )
)

case_conflict = {"amount": 15_000, "risk_score": 5}
bundle_unsat = unsat_kernel.evaluate(case_conflict)

print("=== RAW BUNDLE (UNSAT) ===")
print(json.dumps(bundle_unsat, indent=2, ensure_ascii=False))

print("\n=== WYJA≈öNIENIE (UNSAT) ===")
print(explainer.explain(bundle_unsat).to_text(language="pl"))


Kernel evaluation error
Traceback (most recent call last):
  File "/home/jupyter/olga_zydziak/version_beta/Folder/casual_model/Axiomatic-learning/src/axiomatic_kernel.py", line 375, in evaluate
    constraint = axiom.build_constraint(self._variables)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/var/tmp/ipykernel_17759/431343928.py", line 12, in rule_flag_true
    flag = vars_z3["flag"]
           ~~~~~~~^^^^^^^^
KeyError: 'flag'


=== RAW BUNDLE (UNSAT) ===
{
  "decision_status": "ERROR",
  "decision": "ERROR",
  "facts": {
    "amount": 15000,
    "risk_score": 5
  },
  "model": {},
  "satisfied_axioms": [],
  "violated_axioms": [],
  "active_axioms": [],
  "inactive_actions": [],
  "conflicting_axioms": [],
  "rule_version": "demo_unsat_v1",
  "error": "'flag'"
}

=== WYJA≈öNIENIE (UNSAT) ===
WystƒÖpi≈Ç b≈ÇƒÖd podczas ewaluacji regu≈Ç. Kluczowe dane wej≈õciowe: amount=15000, risk_score=5.

B≈ÇƒÖd techniczny: 'flag'


In [None]:
"""rule_analytics.py

FAZA 4 ‚Äì Silnik analizy regu≈Ç i decyzji na podstawie log√≥w JSONL
generowanych przez DecisionLogger z axiomatic_kernel.py.

G≈Ç√≥wne za≈Ço≈ºenia:
- ≈πr√≥d≈Çem prawdy sƒÖ logi decyzji (JSONL), gdzie ka≈ºda linia ma postaƒá:
    {
      "decision_id": "<uuid>",
      "logged_at_utc": "<ISO timestamp>",
      "decision": {
         "decision_status": "...",
         "decision": "...",
         "facts": {...},
         "model": {...},
         "satisfied_axioms": [...],
         "violated_axioms": [...],
         "active_axioms": [...],
         "inactive_actions": [...],
         "conflicting_axioms": [...],
         "rule_version": "..."
         ...
      }
    }

- Modu≈Ç nie zale≈ºy od Z3 ani innych ciƒô≈ºkich komponent√≥w ‚Äì operuje
  wy≈ÇƒÖcznie na danych z log√≥w.

- Wynikiem analizy jest struktura danych gotowa do dalszego
  raportowania / wizualizacji w PoC bankowym:
  * statystyki decyzji,
  * statystyki regu≈Ç,
  * raport pokrycia rulesetu (je≈õli podamy RuleSet z rules_io).

Mo≈ºesz ten modu≈Ç wpiƒÖƒá bezpo≈õrednio do istniejƒÖcego projektu.
"""

from __future__ import annotations

import json
import logging
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional

from rules_io import RuleSet, load_ruleset_from_file

logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# Modele danych
# ---------------------------------------------------------------------------


@dataclass(frozen=True)
class DecisionRecord:
    """Pojedynczy rekord decyzji odczytany z logu JSONL.

    Attributes:
        decision_id:
            Identyfikator decyzji nadany przez DecisionLogger.
        logged_at_utc:
            Moment zapisania decyzji w formacie datetime (UTC).
        bundle:
            Pe≈Çny "proof bundle" zwr√≥cony przez AxiomKernel.evaluate().
    """

    decision_id: str
    logged_at_utc: datetime
    bundle: Dict[str, Any]

    @staticmethod
    def from_json_line(line: str) -> "DecisionRecord":
        """Parsuje jednƒÖ liniƒô JSONL i zwraca DecisionRecord.

        Podnosi ValueError przy b≈Çƒôdnym formacie.
        """
        raw = json.loads(line)
        decision_id = str(raw.get("decision_id", ""))

        logged_at_raw = raw.get("logged_at_utc")
        if not isinstance(logged_at_raw, str):
            raise ValueError("logged_at_utc must be a string timestamp")

        try:
            logged_at = datetime.fromisoformat(logged_at_raw)
        except ValueError as exc:
            raise ValueError(
                f"Invalid ISO timestamp in logged_at_utc: {logged_at_raw!r}"
            ) from exc

        bundle = raw.get("decision")
        if not isinstance(bundle, dict):
            raise ValueError("'decision' field must be an object")

        return DecisionRecord(
            decision_id=decision_id or "",
            logged_at_utc=logged_at,
            bundle=bundle,
        )


@dataclass
class DecisionOutcomeStats:
    """Zagregowane statystyki decyzji w logu."""

    total_decisions: int = 0
    by_decision: Dict[str, int] = field(default_factory=dict)
    by_status: Dict[str, int] = field(default_factory=dict)
    by_rule_version: Dict[str, int] = field(default_factory=dict)

    # liczba przypadk√≥w, w kt√≥rych solver zwr√≥ci≈Ç UNSAT (konflikt regu≈Ç)
    unsat_cases: int = 0

    # liczba przypadk√≥w, w kt√≥rych status by≈Ç ERROR lub UNKNOWN
    error_cases: int = 0

    def as_dict(self) -> Dict[str, Any]:
        return {
            "total_decisions": self.total_decisions,
            "by_decision": dict(self.by_decision),
            "by_status": dict(self.by_status),
            "by_rule_version": dict(self.by_rule_version),
            "unsat_cases": self.unsat_cases,
            "error_cases": self.error_cases,
        }


@dataclass
class RuleStats:
    """Statystyki pojedynczej regu≈Çy (na przestrzeni wielu decyzji)."""

    rule_id: str
    description: Optional[str] = None

    # liczba decyzji, w kt√≥rych regu≈Ça w og√≥le siƒô pojawi≈Ça
    total_occurrences: int = 0

    # liczba decyzji, w kt√≥rych regu≈Ça by≈Ça logicznie spe≈Çniona
    satisfied: int = 0

    # liczba decyzji, w kt√≥rych regu≈Ça by≈Ça logicznie niespe≈Çniona
    violated: int = 0

    # liczba decyzji, w kt√≥rych antecedent by≈Ç TRUE
    active: int = 0

    # liczba decyzji, w kt√≥rych regu≈Ça by≈Ça true "vacuously" (antecedent FALSE)
    inactive: int = 0

    # liczba decyzji, w kt√≥rych regu≈Ça wystƒÖpi≈Ça w conflicting_axioms
    in_conflict: int = 0

    def as_dict(self) -> Dict[str, Any]:
        return {
            "rule_id": self.rule_id,
            "description": self.description,
            "total_occurrences": self.total_occurrences,
            "satisfied": self.satisfied,
            "violated": self.violated,
            "active": self.active,
            "inactive": self.inactive,
            "in_conflict": self.in_conflict,
        }


@dataclass
class RuleCoverageReport:
    """Raport pokrycia rulesetu na podstawie log√≥w.

    Attributes:
        ruleset_id:
            Id rulesetu (z pliku).
        version:
            Wersja rulesetu.
        total_enabled_rules:
            Liczba regu≈Ç enabled=True w ruleset.
        used_rules:
            Lista identyfikator√≥w regu≈Ç, kt√≥re pojawi≈Çy siƒô
            w statystykach (czyli wystƒÖpi≈Çy w co najmniej jednej decyzji).
        unused_rules:
            Lista identyfikator√≥w regu≈Ç enabled=True, kt√≥re nie
            pojawi≈Çy siƒô w logach (martwe / nieu≈ºywane).
    """

    ruleset_id: str
    version: str
    total_enabled_rules: int
    used_rules: List[str] = field(default_factory=list)
    unused_rules: List[str] = field(default_factory=list)

    def as_dict(self) -> Dict[str, Any]:
        return {
            "ruleset_id": self.ruleset_id,
            "version": self.version,
            "total_enabled_rules": self.total_enabled_rules,
            "used_rules": list(self.used_rules),
            "unused_rules": list(self.unused_rules),
        }


@dataclass
class RuleAnalyticsResult:
    """Kompletny wynik analizy regu≈Ç i decyzji."""

    outcome_stats: DecisionOutcomeStats
    rule_stats: Dict[str, RuleStats] = field(default_factory=dict)
    coverage_report: Optional[RuleCoverageReport] = None

    def as_dict(self) -> Dict[str, Any]:
        return {
            "outcome_stats": self.outcome_stats.as_dict(),
            "rule_stats": {
                rule_id: stats.as_dict()
                for rule_id, stats in sorted(self.rule_stats.items())
            },
            "coverage_report": (
                None
                if self.coverage_report is None
                else self.coverage_report.as_dict()
            ),
        }


# ---------------------------------------------------------------------------
# Czytnik log√≥w JSONL
# ---------------------------------------------------------------------------


class DecisionLogReader:
    """Prosty reader log√≥w JSONL z DecisionLogger.

    Przechodzi liniƒô po linii, zwraca DecisionRecord. B≈Çƒôdy parsowania
    loguje, ale nie przerywa ca≈Çej analizy (odrzuca wadliwƒÖ liniƒô).
    """

    def __init__(self, path: str | Path) -> None:
        self._path = Path(path)

    def iter_decisions(self) -> Iterable[DecisionRecord]:
        if not self._path.exists():
            logger.warning(
                "Decision log file %s does not exist ‚Äì no data to analyze.",
                self._path,
            )
            return

        with self._path.open("r", encoding="utf-8") as file:
            for line_number, line in enumerate(file, start=1):
                stripped = line.strip()
                if not stripped:
                    continue
                try:
                    yield DecisionRecord.from_json_line(stripped)
                except Exception:  # pragma: no cover - defensywne logowanie
                    logger.exception(
                        "Failed to parse decision log line %d in %s",
                        line_number,
                        self._path,
                    )


# ---------------------------------------------------------------------------
# Silnik analityczny
# ---------------------------------------------------------------------------


class RuleAnalyticsEngine:
    """G≈Ç√≥wny silnik analizy log√≥w regu≈Çowych.

    Typowe u≈ºycie:

        engine = RuleAnalyticsEngine()
        result = engine.analyze_log_file(
            log_path="decision_log.jsonl",
            ruleset_path="rules_aml_v1.yaml",
        )
        report = result.as_dict()
    """

    def analyze_log_file(
        self,
        *,
        log_path: str | Path,
        ruleset: Optional[RuleSet] = None,
        ruleset_path: Optional[str | Path] = None,
    ) -> RuleAnalyticsResult:
        """Analizuje podany plik log√≥w JSONL.

        Mo≈ºesz przekazaƒá:
        - gotowy RuleSet (ruleset),
        - albo ≈õcie≈ºkƒô do pliku rulesetu (ruleset_path),
        - albo nic (analiza tylko decyzji i regu≈Ç obecnych w logach).

        Je≈õli podano zar√≥wno ruleset, jak i ruleset_path, priorytet
        ma obiekt ruleset.
        """

        if ruleset is None and ruleset_path is not None:
            ruleset = load_ruleset_from_file(Path(ruleset_path))

        reader = DecisionLogReader(log_path)

        outcome_stats = DecisionOutcomeStats()
        rule_stats: Dict[str, RuleStats] = {}

        for record in reader.iter_decisions():
            bundle = record.bundle

            decision = str(bundle.get("decision", "UNKNOWN"))
            status = str(bundle.get("decision_status", "UNKNOWN"))
            rule_version = str(bundle.get("rule_version", "unknown"))

            outcome_stats.total_decisions += 1
            outcome_stats.by_decision[decision] = (
                outcome_stats.by_decision.get(decision, 0) + 1
            )
            outcome_stats.by_status[status] = (
                outcome_stats.by_status.get(status, 0) + 1
            )
            outcome_stats.by_rule_version[rule_version] = (
                outcome_stats.by_rule_version.get(rule_version, 0) + 1
            )

            if status == "UNSAT":
                outcome_stats.unsat_cases += 1
            if status in {"ERROR", "UNKNOWN"}:
                outcome_stats.error_cases += 1

            # Zbierz regu≈Çy wystƒôpujƒÖce w tej decyzji, aby m√≥c policzyƒá
            # total_occurrences (ka≈ºda regu≈Ça max raz na decyzjƒô).
            rules_in_decision: set[str] = set()

            def _ensure_rule_stats(
                rule_id: str,
                description: Optional[str],
            ) -> RuleStats:
                if rule_id not in rule_stats:
                    rule_stats[rule_id] = RuleStats(
                        rule_id=rule_id,
                        description=description,
                    )
                else:
                    # Je≈õli wcze≈õniej description by≈Ço None, a teraz mamy
                    # jakikolwiek opis, uzupe≈Çnijmy go.
                    if description and not rule_stats[rule_id].description:
                        rule_stats[rule_id].description = description
                return rule_stats[rule_id]

            # satisfied_axioms: lista dict√≥w z polami id, description, ...
            for entry in bundle.get("satisfied_axioms", []):
                rule_id = str(entry.get("id", ""))
                if not rule_id:
                    continue
                description = entry.get("description")
                stats = _ensure_rule_stats(rule_id, description)
                stats.satisfied += 1
                rules_in_decision.add(rule_id)

            # violated_axioms
            for entry in bundle.get("violated_axioms", []):
                rule_id = str(entry.get("id", ""))
                if not rule_id:
                    continue
                description = entry.get("description")
                stats = _ensure_rule_stats(rule_id, description)
                stats.violated += 1
                rules_in_decision.add(rule_id)

            # active_axioms
            for entry in bundle.get("active_axioms", []):
                rule_id = str(entry.get("id", ""))
                if not rule_id:
                    continue
                description = entry.get("description")
                stats = _ensure_rule_stats(rule_id, description)
                stats.active += 1
                rules_in_decision.add(rule_id)

            # inactive_actions
            for entry in bundle.get("inactive_actions", []):
                rule_id = str(entry.get("id", ""))
                if not rule_id:
                    continue
                description = entry.get("description")
                stats = _ensure_rule_stats(rule_id, description)
                stats.inactive += 1
                rules_in_decision.add(rule_id)

            # conflicting_axioms: lista id (string√≥w)
            for rule_id in bundle.get("conflicting_axioms", []):
                rule_id_str = str(rule_id)
                if not rule_id_str:
                    continue
                stats = _ensure_rule_stats(rule_id_str, None)
                stats.in_conflict += 1
                rules_in_decision.add(rule_id_str)

            # Na koniec zwiƒôkszamy total_occurrences dla ka≈ºdej regu≈Çy,
            # kt√≥ra pojawi≈Ça siƒô w tej decyzji w jakiejkolwiek roli.
            for rule_id in rules_in_decision:
                rule_stats[rule_id].total_occurrences += 1

        coverage_report: Optional[RuleCoverageReport] = None

        if ruleset is not None:
            # Identyfikatory regu≈Ç, kt√≥re wystƒôpujƒÖ w statystykach
            used_rule_ids = {rule_id for rule_id in rule_stats}
            enabled_rules = [rule for rule in ruleset.rules if rule.enabled]
            enabled_rule_ids = {rule.rule_id for rule in enabled_rules}
            unused_rule_ids = sorted(enabled_rule_ids - used_rule_ids)
            used_rule_ids_sorted = sorted(enabled_rule_ids & used_rule_ids)

            coverage_report = RuleCoverageReport(
                ruleset_id=ruleset.ruleset_id,
                version=ruleset.version,
                total_enabled_rules=len(enabled_rules),
                used_rules=used_rule_ids_sorted,
                unused_rules=unused_rule_ids,
            )

        return RuleAnalyticsResult(
            outcome_stats=outcome_stats,
            rule_stats=rule_stats,
            coverage_report=coverage_report,
        )
