# Lovli Source-Gating Validation Run (Colab GPU)

This notebook runs the **v3 source-gating workflow** on Colab (H100/T4 compatible), so we avoid local RAM limits.

It runs:
- `scripts/build_catalog.py` (merge `data/nl` + `data/sf`)
- `scripts/validate_reindex.py`
- `scripts/analyze_law_contamination.py`
- `scripts/sweep_retrieval_thresholds.py`

The setup enables law routing + law coherence filtering, then exports analysis artifacts for review.

## 1. Runtime and Repository Setup

Use a **GPU runtime** before running this notebook (H100 preferred, T4 supported).

If you cloned with an older commit, restart runtime and rerun from the top.

In [None]:
%cd /content
!rm -rf lovli
!git clone https://github.com/AndreasRamsli/lovli.git
%cd /content/lovli

# Install project with dependencies required by validation scripts.
%pip install -q -U pip
%pip install -q -e .

# Safety net for environments where editable install path is delayed.
import sys
from pathlib import Path
src_path = str(Path('/content/lovli/src'))
if src_path not in sys.path:
    sys.path.insert(0, src_path)

print('Setup complete')

In [None]:
import torch
print('CUDA available:', torch.cuda.is_available())
if torch.cuda.is_available():
    name = torch.cuda.get_device_name(0)
    props = torch.cuda.get_device_properties(0)
    print(f'GPU: {name}')
    print(f'VRAM: {props.total_memory / (1024**3):.1f} GB')

## 2. Environment Configuration (v3 + routing/coherence)

In [None]:
import os
import getpass

# Required Qdrant settings
os.environ['QDRANT_URL'] = 'https://acc5c492-7d2c-4b95-b0c5-2931ff2ecebd.eu-west-1-0.aws.cloud.qdrant.io'
os.environ['QDRANT_API_KEY'] = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.mpjUx25tLCPdDaEY31eqC1YcVBQijf2Tl4xru1F_56k'
os.environ['QDRANT_COLLECTION_NAME'] = 'lovli_laws_v3'

# Required by Settings model even for retrieval/eval scripts.
os.environ['OPENROUTER_API_KEY'] = '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'

# Keep traces off for speed/clean logs.
os.environ['LANGCHAIN_TRACING_V2'] = 'false'
os.environ['LANGSMITH_TRACING'] = 'false'
os.environ['SWEEP_SKIP_INDEX_SCAN'] = 'true'

# Versioned trust profiles: switch TRUST_PROFILE between balanced_v1 and strict_v1.
os.environ['TRUST_PROFILE_VERSION'] = '2026-02-16'
profile_name = os.environ.get('TRUST_PROFILE', 'balanced_v1')
profiles = {
    'balanced_v1': {
        'RETRIEVAL_K_INITIAL': '20',
        'RERANKER_CONFIDENCE_THRESHOLD': '0.35',
        'RERANKER_MIN_DOC_SCORE': '0.35',
        'RERANKER_AMBIGUITY_MIN_GAP': '0.05',
        'RERANKER_AMBIGUITY_TOP_SCORE_CEILING': '0.7',
        'LAW_ROUTING_FALLBACK_UNFILTERED': 'true',
    },
    'strict_v1': {
        'RETRIEVAL_K_INITIAL': '15',
        'RERANKER_CONFIDENCE_THRESHOLD': '0.45',
        'RERANKER_MIN_DOC_SCORE': '0.55',
        'RERANKER_AMBIGUITY_MIN_GAP': '0.10',
        'RERANKER_AMBIGUITY_TOP_SCORE_CEILING': '0.7',
        'LAW_ROUTING_FALLBACK_UNFILTERED': 'false',
    },
}
profile = profiles.get(profile_name, profiles['balanced_v1'])
os.environ['TRUST_PROFILE'] = profile_name if profile_name in profiles else 'balanced_v1'

# Shared law routing and coherence settings.
os.environ['LAW_ROUTING_ENABLED'] = 'true'
os.environ['LAW_CATALOG_PATH'] = 'data/law_catalog.json'
os.environ['LAW_ROUTING_PREFILTER_K'] = '80'
os.environ['LAW_ROUTING_RERANK_TOP_K'] = '6'
os.environ['LAW_ROUTING_MIN_CONFIDENCE'] = '0.30'
os.environ['LAW_ROUTING_UNCERTAINTY_TOP_SCORE_CEILING'] = '0.55'
os.environ['LAW_ROUTING_UNCERTAINTY_MIN_GAP'] = '0.04'
os.environ['LAW_ROUTING_FALLBACK_MAX_LAWS'] = '12'
os.environ['LAW_COHERENCE_FILTER_ENABLED'] = 'true'
os.environ['LAW_COHERENCE_MIN_LAW_COUNT'] = '2'
os.environ['LAW_COHERENCE_SCORE_GAP'] = '0.15'
os.environ['LAW_COHERENCE_RELATIVE_GAP'] = '0.05'
os.environ['LAW_COHERENCE_MAX_SCORE_WEIGHT'] = '0.6'
os.environ['LAW_COHERENCE_MIN_KEEP'] = '1'
os.environ['LAW_COHERENCE_DOMINANT_CONCENTRATION_THRESHOLD'] = '0.60'

# Apply selected profile values.
for key, value in profile.items():
    os.environ[key] = value

# Guard against accidental string values like 'None'.
raw = os.environ.get('SWEEP_SAMPLE_SIZE')
if raw is not None and raw.strip().lower() in {'', 'none', 'null'}:
    os.environ.pop('SWEEP_SAMPLE_SIZE', None)

print('TRUST_PROFILE          =', os.environ['TRUST_PROFILE'])
print('TRUST_PROFILE_VERSION  =', os.environ['TRUST_PROFILE_VERSION'])
print('QDRANT_COLLECTION_NAME =', os.environ['QDRANT_COLLECTION_NAME'])
print('LAW_ROUTING_ENABLED    =', os.environ['LAW_ROUTING_ENABLED'])
print('LAW_CATALOG_PATH       =', os.environ['LAW_CATALOG_PATH'])
print('LAW_ROUTING_PREFILTER  =', os.environ['LAW_ROUTING_PREFILTER_K'])
print('LAW_ROUTING_RERANK_K   =', os.environ['LAW_ROUTING_RERANK_TOP_K'])
print('LAW_ROUTING_CONF_MIN   =', os.environ['LAW_ROUTING_MIN_CONFIDENCE'])
print('LAW_ROUTE_UNCERT_CEIL  =', os.environ['LAW_ROUTING_UNCERTAINTY_TOP_SCORE_CEILING'])
print('LAW_ROUTE_UNCERT_GAP   =', os.environ['LAW_ROUTING_UNCERTAINTY_MIN_GAP'])
print('LAW_ROUTE_FALLBACK     =', os.environ['LAW_ROUTING_FALLBACK_UNFILTERED'])
print('LAW_COHERENCE_FILTER   =', os.environ['LAW_COHERENCE_FILTER_ENABLED'])
print('LAW_COHERENCE_CONC_THR =', os.environ['LAW_COHERENCE_DOMINANT_CONCENTRATION_THRESHOLD'])
print('SWEEP_SAMPLE_SIZE      =', os.environ.get('SWEEP_SAMPLE_SIZE'))

In [None]:
# Optional quick mode before full run.
# Uncomment to run a small sample first.
# os.environ['SWEEP_SAMPLE_SIZE'] = '100'

# Ensure full run by default.
os.environ.pop('SWEEP_SAMPLE_SIZE', None)
print('SWEEP_SAMPLE_SIZE now:', os.environ.get('SWEEP_SAMPLE_SIZE'))

## 3. Mount Drive, Extract Data, Build Catalog, Validate Reindex

In [None]:
%cd /content/lovli

from pathlib import Path
from google.colab import drive
import json
import subprocess


def run_to_log(cmd: str, log_path: Path) -> int:
    log_path.parent.mkdir(parents=True, exist_ok=True)
    with open(log_path, 'w', encoding='utf-8') as log_file:
        proc = subprocess.run(
            cmd,
            shell=True,
            cwd='/content/lovli',
            stdout=log_file,
            stderr=subprocess.STDOUT,
            text=True,
        )
    return proc.returncode


def print_log_matches(log_path: Path, patterns: list[str], limit: int = 50) -> None:
    if not log_path.exists():
        print(f'log missing: {log_path}')
        return
    lines = log_path.read_text(encoding='utf-8', errors='ignore').splitlines()
    kept = []
    for line in lines:
        if any(p in line for p in patterns):
            kept.append(line)
    print(f'--- {log_path.name} (key lines) ---')
    for line in kept[-limit:]:
        print(line)


# Mount Drive for access to the compressed dataset.
drive.mount('/content/drive')

# Update this path if your tar is moved.
tar_path = Path('/content/drive/MyDrive/Colab Notebooks/Lovli/data/lovli-data.tar.bz2')
assert tar_path.exists(), f'Data tar not found: {tar_path}'

# Extract into repo data/ folder (safe to rerun).
subprocess.run("mkdir -p /content/lovli/data", shell=True, check=True)
subprocess.run(
    "tar -xjf '/content/drive/MyDrive/Colab Notebooks/Lovli/data/lovli-data.tar.bz2' -C /content/lovli --exclude='._*'",
    shell=True,
    check=True,
)

nl_count = len(list(Path('/content/lovli/data/nl').glob('*.xml')))
sf_count = len(list(Path('/content/lovli/data/sf').glob('*.xml')))
print({'nl_xml_files': nl_count, 'sf_xml_files': sf_count})
assert nl_count > 0 and sf_count > 0, 'Expected both data/nl and data/sf to contain XML files.'

# Build merged catalog used by law routing (no summaries for speed).
build_log = Path('/content/lovli/eval/logs/build_catalog.log')
rc = run_to_log(
    'python scripts/build_catalog.py data/nl data/sf --no-summaries --output data/law_catalog.json',
    build_log,
)
print('build_catalog exit_code =', rc)
print_log_matches(
    build_log,
    patterns=['Catalog build complete', 'Laws cataloged', 'With summaries', 'Missing summaries', 'Output:', 'Time:'],
)
assert rc == 0, 'build_catalog failed; inspect eval/logs/build_catalog.log'

# Validate metadata + retrieval smoke checks on v3 collection.
validate_log = Path('/content/lovli/eval/logs/validate_reindex.log')
rc = run_to_log(
    'python scripts/validate_reindex.py --collection lovli_laws_v3 --with-smoke',
    validate_log,
)
print('validate_reindex exit_code =', rc)
print_log_matches(
    validate_log,
    patterns=['Collection:', 'total_points=', 'missing_doc_type=', 'smoke query=', 'Validation completed.'],
)
assert rc == 0, 'validate_reindex failed; inspect eval/logs/validate_reindex.log'

## 4. Law Contamination Analysis

In [None]:
%cd /content/lovli
from pathlib import Path
import json

contam_log = Path('/content/lovli/eval/logs/analyze_law_contamination.log')
rc = run_to_log(
    'python -u scripts/analyze_law_contamination.py --output eval/law_contamination_report.json',
    contam_log,
)
print('analyze_law_contamination exit_code =', rc)
print_log_matches(
    contam_log,
    patterns=['Processed', 'Saved contamination report', 'Contamination rate='],
)

report_path = Path('/content/lovli/eval/law_contamination_report.json')
if report_path.exists():
    report = json.loads(report_path.read_text(encoding='utf-8'))
    agg = report.get('aggregate', {})
    print('--- contamination aggregate ---')
    for key in [
        'total_questions',
        'contamination_rate',
        'singleton_foreign_rate',
        'unexpected_citation_rate',
        'mean_foreign_score_gap',
    ]:
        print(f'{key}: {agg.get(key)}')

assert rc == 0, 'analyze_law_contamination failed; inspect eval/logs/analyze_law_contamination.log'

In [None]:
# Optional quick check before full sweep.
# Use a small sample to verify config quickly.
%cd /content/lovli
import os
import json
from pathlib import Path

os.environ['SWEEP_SAMPLE_SIZE'] = '10'
quick_sweep_log = Path('/content/lovli/eval/logs/retrieval_sweep_quick.log')
rc = run_to_log('python -u scripts/sweep_retrieval_thresholds.py', quick_sweep_log)
os.environ.pop('SWEEP_SAMPLE_SIZE', None)

print('quick sweep exit_code =', rc)
print_log_matches(
    quick_sweep_log,
    patterns=['Using sample size', 'Saved results:', 'Top 5 configurations:'],
)

sweep_path = Path('/content/lovli/eval/retrieval_sweep_results.json')
if sweep_path.exists():
    rows = json.loads(sweep_path.read_text(encoding='utf-8'))
    if rows:
        top = rows[0]
        print('--- quick sweep top row ---')
        for key in [
            'is_profile_default_row',
            'recall_at_k',
            'citation_precision',
            'unexpected_citation_rate',
            'law_contamination_rate',
            'balanced_score',
        ]:
            print(f'{key}: {top.get(key)}')

assert rc == 0, 'quick sweep failed; inspect eval/logs/retrieval_sweep_quick.log'

## 5. Full Retrieval Sweep (Colab run)

In [None]:
%cd /content/lovli
import json
from pathlib import Path

full_sweep_log = Path('/content/lovli/eval/logs/retrieval_sweep_full.log')
rc = run_to_log('python -u scripts/sweep_retrieval_thresholds.py', full_sweep_log)

print('full sweep exit_code =', rc)
print_log_matches(
    full_sweep_log,
    patterns=['Saved results:', 'Top 5 configurations:'],
)

sweep_path = Path('/content/lovli/eval/retrieval_sweep_results.json')
if sweep_path.exists():
    rows = json.loads(sweep_path.read_text(encoding='utf-8'))
    if rows:
        top = rows[0]
        print('--- full sweep top row ---')
        for key in [
            'is_profile_default_row',
            'recall_at_k',
            'citation_precision',
            'unexpected_citation_rate',
            'law_contamination_rate',
            'law_coherence_filtered_count',
            'balanced_score',
        ]:
            print(f'{key}: {top.get(key)}')

assert rc == 0, 'full sweep failed; inspect eval/logs/retrieval_sweep_full.log'

In [None]:
# Must-pass regression gates against versioned baseline.
%cd /content/lovli
from pathlib import Path

gates_log = Path('/content/lovli/eval/logs/regression_gates.log')
rc = run_to_log(
    'python -u scripts/check_regression_gates.py '
    '--contamination-report eval/law_contamination_report.json '
    '--sweep-results eval/retrieval_sweep_results.json '
    '--baseline eval/baselines/production_trust_baseline_v1.json '
    '--profile "$TRUST_PROFILE"',
    gates_log,
)
print('regression gates exit_code =', rc)
print_log_matches(
    gates_log,
    patterns=['Gate sweep row selected', '[PASS]', '[FAIL]', 'Gate checks passed', 'All regression gates passed'],
    limit=120,
)
assert rc == 0, 'regression gates failed; inspect eval/logs/regression_gates.log'

## 6. Artifact Overview and Quick Metric Check

Run acceptance targets (balanced objective):
- `recall_at_k` should improve materially vs previous baseline (~0.146)
- `citation_precision` should increase from previous baseline (~0.073)
- `unexpected_citation_rate` should decrease
- `law_coherence_filtered_count` should be non-zero on full sweep
- `missing_doc_type` must remain `0`

In [None]:
%cd /content/lovli
!ls -lah eval

import json
from pathlib import Path

artifacts = [
    Path('data/law_catalog.json'),
    Path('eval/law_contamination_report.json'),
    Path('eval/retrieval_sweep_results.json'),
]
for p in artifacts:
    print(f'{p}:', 'exists' if p.exists() else 'missing')

report_path = Path('eval/law_contamination_report.json')
if report_path.exists():
    report = json.loads(report_path.read_text(encoding='utf-8'))
    agg = report.get('aggregate', {})
    print('\nContamination aggregate:')
    for k in [
        'total_questions',
        'contamination_rate',
        'singleton_foreign_rate',
        'unexpected_citation_rate',
        'mean_foreign_score_gap',
    ]:
        print(f'  {k}: {agg.get(k)}')

sweep_path = Path('eval/retrieval_sweep_results.json')
if sweep_path.exists():
    rows = json.loads(sweep_path.read_text(encoding='utf-8'))
    if rows:
        top = rows[0]
        print('\nTop sweep row:')
        for k in [
            'recall_at_k',
            'citation_precision',
            'unexpected_citation_rate',
            'law_contamination_rate',
            'law_coherence_filtered_count',
        ]:
            print(f'  {k}: {top.get(k)}')