diff --git a/backend/app/main.py b/backend/app/main.py index 5074d1c..7ff74a0 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,3 +1,9 @@ +from pathlib import Path + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles + from backend.app.api import ( routes_analysis, routes_evaluation, @@ -8,8 +14,6 @@ routes_taxonomy_workbench, ) from backend.app.core.logging import configure_logging -from fastapi import FastAPI -from fastapi.middleware.cors import CORSMiddleware configure_logging() @@ -38,5 +42,11 @@ app.include_router(routes_reports.router, prefix="/api") @app.get("/health") +@app.get("/api/health") def health() -> dict[str, str]: return {"status": "ok"} + + +FRONTEND_DIST = Path(__file__).resolve().parents[2] / "frontend" / "dist" +if FRONTEND_DIST.exists(): + app.mount("/", StaticFiles(directory=FRONTEND_DIST, html=True), name="frontend") diff --git a/build_backend.py b/build_backend.py deleted file mode 100644 index 4018aae..0000000 --- a/build_backend.py +++ /dev/null @@ -1,100 +0,0 @@ -from __future__ import annotations - -import hashlib -import zipfile -from pathlib import Path - -NAME = "argument_risk_engine" -VERSION = "0.1.0" -DIST = f"{NAME}-{VERSION}.dist-info" -ROOT = Path(__file__).parent.resolve() - - -def _metadata() -> str: - return "\n".join([ - "Metadata-Version: 2.1", - "Name: argument-risk-engine", - f"Version: {VERSION}", - "Summary: Local taxonomy-grounded argument risk analysis dashboard.", - "Requires-Python: >=3.10", - "Provides-Extra: dev", - "", - ]) - - -def _entry_points() -> str: - return "\n".join([ - "[console_scripts]", - "uvicorn=uvicorn.main:main", - "", - ]) - - -def _wheel() -> str: - return "\n".join([ - "Wheel-Version: 1.0", - "Generator: argument-risk-engine-local-backend", - "Root-Is-Purelib: true", - "Tag: py3-none-any", - "", - ]) - - -def _record_line(path: str, data: bytes) -> str: - digest = hashlib.sha256(data).digest() - import base64 - encoded = base64.urlsafe_b64encode(digest).rstrip(b"=").decode() - return f"{path},sha256={encoded},{len(data)}" - - -def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): - return _write_wheel(Path(wheel_directory), editable=False) - - -def build_editable(wheel_directory, config_settings=None, metadata_directory=None): - return _write_wheel(Path(wheel_directory), editable=True) - - -def get_requires_for_build_wheel(config_settings=None): - return [] - - -def get_requires_for_build_editable(config_settings=None): - return [] - - -def prepare_metadata_for_build_wheel(metadata_directory, config_settings=None): - dist = Path(metadata_directory) / DIST - dist.mkdir(parents=True, exist_ok=True) - (dist / "METADATA").write_text(_metadata()) - (dist / "WHEEL").write_text(_wheel()) - return DIST - - -def prepare_metadata_for_build_editable(metadata_directory, config_settings=None): - return prepare_metadata_for_build_wheel(metadata_directory, config_settings) - - -def _write_wheel(out_dir: Path, editable: bool) -> str: - out_dir.mkdir(parents=True, exist_ok=True) - filename = f"{NAME}-{VERSION}-py3-none-any.whl" - wheel_path = out_dir / filename - records: list[str] = [] - with zipfile.ZipFile(wheel_path, "w", zipfile.ZIP_DEFLATED) as zf: - files: dict[str, bytes] = { - f"{DIST}/METADATA": _metadata().encode(), - f"{DIST}/WHEEL": _wheel().encode(), - f"{DIST}/entry_points.txt": _entry_points().encode(), - } - if editable: - files["argument_risk_engine_editable.pth"] = f"{ROOT}\n{ROOT / 'engine'}\n".encode() - else: - for base in [ROOT / "engine" / "argument_risk_engine", ROOT / "backend"]: - for path in base.rglob("*.py"): - files[str(path.relative_to(base.parent))] = path.read_bytes() - for path, data in files.items(): - zf.writestr(path, data) - records.append(_record_line(path, data)) - record_path = f"{DIST}/RECORD" - zf.writestr(record_path, "\n".join(records + [f"{record_path},,"]) + "\n") - return filename diff --git a/data/taxonomy/packs/starter-pack.yaml b/data/taxonomy/packs/starter-pack.yaml index 90247a8..fe5278b 100644 --- a/data/taxonomy/packs/starter-pack.yaml +++ b/data/taxonomy/packs/starter-pack.yaml @@ -19,7 +19,14 @@ ], "mitigation": "Ask for scope, counterexamples, and supporting evidence.", "active": true, - "metadata": {} + "metadata": {}, + "negative_examples": [ + "Every backup completed successfully according to the job log." + ], + "minimum_evidence_requirement": "Evidence must show a broad quantifier applied to an unsupported group, behavior, or conclusion, not a bounded or sourced observation.", + "common_false_positives": [ + "Bounded inventory, log, policy, or technical statements using absolute words literally." + ] }, { "id": "unsupported_causal_claim", @@ -37,7 +44,14 @@ ], "mitigation": "Request causal evidence and consider alternative explanations.", "active": true, - "metadata": {} + "metadata": {}, + "negative_examples": [ + "The incident report names three causes and asks for more evidence." + ], + "minimum_evidence_requirement": "Evidence must include causal wording presented as a conclusion without cited support or alternative explanations.", + "common_false_positives": [ + "Cautious causal hypotheses, cited incident reports, or requests for additional causal evidence." + ] }, { "id": "dehumanizing_language", @@ -55,7 +69,14 @@ ], "mitigation": "Escalate for careful human review and contextual assessment.", "active": true, - "metadata": {} + "metadata": {}, + "negative_examples": [ + "The novel describes animals in a literal zoo." + ], + "minimum_evidence_requirement": "Evidence must show dehumanizing terms applied to a person or group of people.", + "common_false_positives": [ + "Literal references to non-human animals, pests, software parasites, or fictional creatures." + ] } ] -} \ No newline at end of file +} diff --git a/engine/argument_risk_engine/analyzer.py b/engine/argument_risk_engine/analyzer.py index 6feb830..bb85e50 100644 --- a/engine/argument_risk_engine/analyzer.py +++ b/engine/argument_risk_engine/analyzer.py @@ -33,10 +33,18 @@ def analyze_text( ) -> dict[str, Any]: taxonomy_pack = pack or default_taxonomy_pack() normalized_text = text or "" + requested_mode = mode or DEFAULT_MODE + requested_provider_id = model_provider_id or DEFAULT_MODEL_PROVIDER_ID + warnings: list[str] = [] + if requested_mode != DEFAULT_MODE or requested_provider_id != DEFAULT_MODEL_PROVIDER_ID: + warnings.append( + "LLM-backed analysis is not enabled in this release; /analyze uses deterministic_baseline only." + ) + mode = DEFAULT_MODE + model_provider_id = DEFAULT_MODEL_PROVIDER_ID claims = extract_claims(normalized_text) claims_out: list[dict[str, Any]] = [] all_scores: list[float] = [] - warnings: list[str] = [] any_review = False for index, claim in enumerate(claims, start=1): @@ -122,7 +130,7 @@ def analyze_text( "model_provider_id": model_provider_id, "model_name": DEFAULT_MODEL_NAME, "llm_used": False, - "deterministic_fallback_used": False if mode == DEFAULT_MODE else allow_deterministic_fallback, + "deterministic_fallback_used": False, "claims": claims_out, "overall_risk_score": overall, "risk_level": risk_level(overall), diff --git a/engine/argument_risk_engine/classification/deterministic.py b/engine/argument_risk_engine/classification/deterministic.py index b80e3cb..5e5b6e0 100644 --- a/engine/argument_risk_engine/classification/deterministic.py +++ b/engine/argument_risk_engine/classification/deterministic.py @@ -1,5 +1,7 @@ from __future__ import annotations +import re + from argument_risk_engine.retrieval.candidate_filter import is_healthy_suppressor from argument_risk_engine.taxonomy.models import ActivationStatus, TaxonomyEntry @@ -38,6 +40,8 @@ def classify_deterministic( continue if _exclusion_triggered(haystack, entry.exclusion_criteria): continue + if entry.id == "overgeneralization" and not _has_unsupported_universal_claim(claim): + continue evidence = _best_evidence_span(haystack, entry, candidate) if evidence is None: @@ -76,6 +80,69 @@ def classify_deterministic( return results[:limit] +def _has_unsupported_universal_claim(claim: str) -> bool: + """Return true only for broad, unsupported universal claims. + + This keeps trigger words such as "always", "never", "all", and + "everyone" from classifying bounded observations, quoted words, + documented rules, or operational statements as overgeneralization. + """ + + lower = claim.strip().lower() + quoted_or_literal_pattern = ( + r"[\"']?(always|never|all|every|everyone|nobody|none|no)[\"']?" + r"\s+(is|are|means|appears|used|reserved)\b" + ) + if re.search(quoted_or_literal_pattern, lower): + return False + bounded_or_supported = [ + "according to", + "based on", + "system log", + "job log", + "manifest", + "packing list", + "style guide", + "handbook", + "launch notes", + "archive", + "shelf", + "bin ", + "exact search", + "should ", + "must ", + ] + if any(marker in lower for marker in bounded_or_supported): + return False + universal_patterns = [ + r"\beveryone\b.+\b(always|never|all|caused|will|fail|ignored?|understood|received|does|is|are)\b", + r"\beveryone\s+(in|on|who)\b.+\b(always|never|will|fail|ignored?|understood|does|is|are)\b", + ( + r"\b(all|every|no|none of|nobody)\b.+" + r"\b(is|are|was|were|will|proves?|shows?|hated|matters|benefits?|work|failed|useless|broken|unusable)\b" + ), + r"\balways\b.+\b(because|even though|caused?|proves?|shows?)\b", + r"\bnever\b.+\b(because|proves?|shows?|benefits?|works?)\b", + ] + if not any(re.search(pattern, lower) for pattern in universal_patterns): + return False + weak_evidence_markers = [ + "because one", + "after a single", + "from one", + "only two", + "first ", + "single ", + "whole ", + "entire ", + "always", + "never", + "everyone", + "nobody", + "none", + ] + return any(marker in lower for marker in weak_evidence_markers) + def _entry(candidate: object) -> TaxonomyEntry: return getattr(candidate, "entry", candidate) diff --git a/engine/argument_risk_engine/taxonomy/models.py b/engine/argument_risk_engine/taxonomy/models.py index a4b2c74..b9b24e3 100644 --- a/engine/argument_risk_engine/taxonomy/models.py +++ b/engine/argument_risk_engine/taxonomy/models.py @@ -239,6 +239,7 @@ def default_taxonomy_pack() -> TaxonomyPack: detection_level="structural", signals=["always", "never", "everyone", "all", "none"], positive_examples=["Everyone in that group is dishonest."], + negative_examples=["Every backup completed successfully according to the job log."], minimum_evidence_requirement="Evidence span showing an overbroad quantifier applied as support.", common_false_positives=["Legitimate quantified claims with adequate evidence."], enabled_for_mvp=True, diff --git a/fastapi/__init__.py b/fastapi/__init__.py deleted file mode 100644 index 4c4ff3f..0000000 --- a/fastapi/__init__.py +++ /dev/null @@ -1,79 +0,0 @@ -from __future__ import annotations - - -class Response: - def __init__(self, content='', media_type='text/plain', status_code=200, headers=None): - self.content = content - self.media_type = media_type - self.status_code = status_code - self.headers = headers or {} - - -class APIRouter: - def __init__(self, prefix='', tags=None): - self.prefix = prefix - self.routes = {} - - def _add(self, method, path, fn): - self.routes[(method, self.prefix + path)] = fn - return fn - - def get(self, path='', **kwargs): - def deco(fn): - return self._add('GET', path, fn) - return deco - - def post(self, path='', **kwargs): - def deco(fn): - return self._add('POST', path, fn) - return deco - - def put(self, path='', **kwargs): - def deco(fn): - return self._add('PUT', path, fn) - return deco - - def patch(self, path='', **kwargs): - def deco(fn): - return self._add('PATCH', path, fn) - return deco - - -class FastAPI: - def __init__(self, **kwargs): - self.routes = {} - - def add_middleware(self, *args, **kwargs): - return None - - def include_router(self, router, prefix=''): - for (method, path), fn in router.routes.items(): - self.routes[(method, prefix + path)] = fn - - def get(self, path='', **kwargs): - def deco(fn): - self.routes[('GET', path)] = fn - return fn - return deco - - def post(self, path='', **kwargs): - def deco(fn): - self.routes[('POST', path)] = fn - return fn - return deco - - def patch(self, path='', **kwargs): - def deco(fn): - self.routes[('PATCH', path)] = fn - return fn - return deco - - -class UploadFile: - def __init__(self, filename='', file=None): - self.filename = filename - self.file = file - - -def File(default=None, **kwargs): - return default diff --git a/fastapi/middleware/__init__.py b/fastapi/middleware/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/fastapi/middleware/cors.py b/fastapi/middleware/cors.py deleted file mode 100644 index 9543402..0000000 --- a/fastapi/middleware/cors.py +++ /dev/null @@ -1,2 +0,0 @@ -class CORSMiddleware: - pass diff --git a/fastapi/testclient/__init__.py b/fastapi/testclient/__init__.py deleted file mode 100644 index 4fc5ae5..0000000 --- a/fastapi/testclient/__init__.py +++ /dev/null @@ -1,155 +0,0 @@ -from __future__ import annotations - -import inspect -from io import BytesIO -from urllib.parse import parse_qs, urlparse - -from fastapi import Response, UploadFile - - -class _Resp: - def __init__(self, payload, status_code=200): - self._payload = payload - self.status_code = getattr(payload, 'status_code', status_code) - self.content = getattr(payload, 'content', b'') - self.headers = getattr(payload, 'headers', {}) - - def json(self): - def conv(v): - if isinstance(v, Response): - return v.content - if hasattr(v, 'model_dump'): - return v.model_dump() - if isinstance(v, dict): - return {k: conv(i) for k, i in v.items()} - if isinstance(v, list): - return [conv(i) for i in v] - if hasattr(v, 'value'): - return v.value - return v - return conv(self._payload) - - -class TestClient: - def __init__(self, app): - self.app = app - - def get(self, path): - fn, kwargs, query = _resolve(self.app, 'GET', path) - if not fn: - return _Resp({'detail': 'not found'}, 404) - kwargs.update(query) - return _Resp(_call(fn, kwargs)) - - def post(self, path, json=None, files=None): - fn, kwargs, query = _resolve(self.app, 'POST', path) - if not fn: - return _Resp({'detail': 'not found'}, 404) - kwargs.update(query) - kwargs.update(_json_args(fn, json or {})) - kwargs.update(_file_args(files)) - return _Resp(_call(fn, kwargs)) - - def put(self, path, json=None): - fn, kwargs, query = _resolve(self.app, 'PUT', path) - if not fn: - return _Resp({'detail': 'not found'}, 404) - kwargs.update(query) - kwargs.update(_json_args(fn, json or {})) - return _Resp(_call(fn, kwargs)) - - def patch(self, path, json=None): - fn, kwargs, query = _resolve(self.app, 'PATCH', path) - if not fn: - return _Resp({'detail': 'not found'}, 404) - kwargs.update(query) - kwargs.update(_json_args(fn, json or {})) - return _Resp(_call(fn, kwargs)) - - -def _resolve(app, method, raw_path): - parsed = urlparse(raw_path) - path = parsed.path - query = {key: values[-1] for key, values in parse_qs(parsed.query).items()} - exact = app.routes.get((method, path)) - if exact: - return exact, {}, query - path_parts = [part for part in path.split('/') if part] - for (route_method, route_path), fn in app.routes.items(): - if route_method != method: - continue - route_parts = [part for part in route_path.split('/') if part] - if len(route_parts) != len(path_parts): - continue - kwargs = {} - matched = True - for route_part, path_part in zip(route_parts, path_parts, strict=True): - if route_part.startswith('{') and route_part.endswith('}'): - kwargs[route_part[1:-1]] = path_part - elif route_part != path_part: - matched = False - break - if matched: - return fn, kwargs, query - return None, {}, query - - -def _call(fn, kwargs): - sig = inspect.signature(fn) - accepted = {} - for name, param in sig.parameters.items(): - if name in kwargs: - accepted[name] = kwargs[name] - elif param.default is inspect._empty: - ann = _resolve_annotation(fn, param.annotation) - if isinstance(kwargs, dict) and ann is not inspect._empty: - try: - accepted[name] = ann(**kwargs) - except Exception: - pass - return fn(**accepted) - - -def _json_args(fn, data): - if not data: - return {} - sig = inspect.signature(fn) - required_model_params = [] - for name, param in sig.parameters.items(): - if name in {'file'}: - continue - ann = _resolve_annotation(fn, param.annotation) - if ann is inspect._empty or ann in {str, int, bool, float}: - continue - if param.default is inspect._empty: - required_model_params.append((name, ann)) - if len(required_model_params) == 1: - name, cls = required_model_params[0] - try: - return {name: cls(**data)} - except Exception: - return {name: data} - return data - - -def _resolve_annotation(fn, ann): - if isinstance(ann, str): - builtin = {'str': str, 'int': int, 'bool': bool, 'float': float}.get(ann) - if builtin is not None: - return builtin - return getattr(inspect.getmodule(fn), ann, fn.__globals__.get(ann, ann)) - return ann - - -def _file_args(files): - if not files: - return {} - file_info = files.get('file') if isinstance(files, dict) else None - if file_info is None: - return {} - filename, content, *_ = file_info - if hasattr(content, 'read'): - file_obj = content - else: - file_obj = BytesIO(content if isinstance(content, bytes) else str(content).encode()) - return {'file': UploadFile(filename=filename, file=file_obj)} diff --git a/frontend/index.html b/frontend/index.html index 5572dd9..74c4f11 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -4,10 +4,9 @@