# Nyaya Staging Review Notebook
A compact UI to pretty‑print and review Nyaya‑style argument records, approve/disapprove/edit them, persist decisions, and prepare approved clean JSONL snapshots per round.

In [None]:
# 1) Setup: Install and Import Libraries
import sys, json, uuid, copy
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any, Optional

# Optional installs (skip if already available)
try:
    import pandas as pd
except Exception as e:
    raise RuntimeError("pandas is required for this notebook")

try:
    from ipywidgets import Button, Textarea, Output, HBox, VBox, IntProgress, Label
except Exception as e:
    raise RuntimeError("ipywidgets is required. In VS Code, enable the Jupyter extension and ipywidgets support.")

try:
    from rich.console import Console
    from rich.panel import Panel
    from rich.text import Text
except Exception as e:
    raise RuntimeError("rich is required for pretty printing")

console = Console(record=True)

# Paths
WORKDIR = Path('nyaya')
RUNS_DIR = WORKDIR / 'runs'
RUNS_DIR.mkdir(parents=True, exist_ok=True)
SOURCES_DIR = WORKDIR / 'Datasets' / 'sources'
ROUND_DIR = WORKDIR / 'Datasets' / 'rounds' / 'staging_round_0001'
ROUND_DIR.mkdir(parents=True, exist_ok=True)

TIMESTAMP = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
SESSION_DIR = RUNS_DIR / f'review_{TIMESTAMP}'
SESSION_DIR.mkdir(parents=True, exist_ok=True)
LEDGER_PATH = SESSION_DIR / 'ledger.jsonl'
SNAPSHOT_CSV = SESSION_DIR / 'records_snapshot.csv'
APPROVED_OUT = ROUND_DIR / 'approved_snapshot_clean.jsonl'

In [None]:
# 2) Schema and Validation
REQUIRED_FIELDS = [
    'domain','pratijna','hetu','udaharana','upanaya','nigamana','grounding_authority'
]

STATUSES = {'queued','approved','disapproved','edited'}

def normalize_text(s: Optional[str]) -> str:
    if s is None: return ''
    return ' '.join(str(s).strip().split())

def validate_record(r: Dict[str, Any]) -> Dict[str, Any]:
    rec = {k: normalize_text(r.get(k)) for k in REQUIRED_FIELDS}
    # carry optional fields
    rec['source'] = r.get('source')
    rec['notes'] = r.get('notes', '')
    # id and status
    rec['id'] = r.get('id') or str(uuid.uuid4())
    status = r.get('status') or 'queued'
    rec['status'] = status if status in STATUSES else 'queued'
    rec['history'] = r.get('history') or []
    # simple asserts
    missing = [k for k in REQUIRED_FIELDS if not rec[k]]
    if missing:
        rec['status'] = 'queued'
        rec.setdefault('notes','')
        rec['notes'] += f" | Missing fields: {missing}"
    return rec

In [None]:
# 3) Load Seed Data (paste JSON array) and optional source notes
SEED_JSON = [
  {
    "domain": "Literary History / Indology",
    "pratijna": "The Hanuman Chalisa was composed by Tulsidas in the 16th century CE.",
    "hetu": "Because this attribution is firmly established by a continuous literary tradition and the scholarly consensus.",
    "udaharana": "The authorship and period of a major literary work are determined by historical records, manuscript traditions, and scholarly consensus, just as the plays of Kālidāsa are attributed to him based on the historical and literary record of the Gupta period.",
    "upanaya": "The Hanuman Chalisa is such a work, consistently attributed to Tulsidas within the Bhakti literary tradition and by modern historical scholarship, placing him in the 16th century.",
    "nigamana": "Therefore, the Hanuman Chalisa was composed by Tulsidas in the 16th century CE.",
    "grounding_authority": "Historical Record & Literary Scholarly Consensus"
  },
  {
    "domain": "Historical Linguistics",
    "pratijna": "The Hanuman Chalisa is written in the Awadhi language.",
    "hetu": "Because its grammar, vocabulary, and phonology are characteristic of Awadhi.",
    "udaharana": "The language of any text is identified by its specific linguistic features, just as the Canterbury Tales is identified as being written in Middle English due to its distinct grammar, vocabulary, and phonology.",
    "upanaya": "The Hanuman Chalisa exhibits these specific Awadhi features, such as its verb endings (e.g., 'baranauṁ') and nominal forms, which are consistent with the language of Tulsidas's other major work, the Ramcharitmanas.",
    "nigamana": "Therefore, the Hanuman Chalisa is written in the Awadhi language.",
    "grounding_authority": "Philological Analysis & Comparative Linguistics"
  },
  {
    "domain": "Comparative Philology / Grammar",
    "pratijna": "The Awadhi of the Hanuman Chalisa is amenable to Pāṇinian grammatical analysis.",
    "hetu": "Because Awadhi, as a Middle Indo-Aryan language, retains sufficient structural and lexical roots from Sanskrit.",
    "udaharana": "Any descendant language that preserves the core grammatical categories and lexical roots of its parent language can be analyzed using the grammatical framework of the parent, just as Italian can be analyzed using the principles of Latin grammar.",
    "upanaya": "The Awadhi text of the Hanuman Chalisa demonstrates this preservation, allowing for a systematic reconstruction of its verses into grammatically correct Classical Sanskrit by mapping its forms back to their Sanskrit origins (e.g., Awadhi 'jānike' to Sanskrit absolutive 'jñātvā').",
    "nigamana": "Therefore, the Awadhi of the Hanuman Chalisa is amenable to Pāṇinian grammatical analysis.",
    "grounding_authority": "The Comparative Method of Historical Linguistics / Pāṇinian Grammar"
  },
  {
    "domain": "Comparative Philosophy / Epistemology",
    "pratijna": "The opening verses of the Hanuman Chalisa employ a philosophical method of doubt.",
    "hetu": "Because the speaker begins by declaring their own mind as impure and their intellect as deficient ('buddhihīna'), thereby clearing the ground for receiving higher knowledge.",
    "udaharana": "Any philosophical method that begins by systematically doubting or negating the author's own certainty in order to establish a more secure foundation for truth is a method of doubt, as exemplified by Descartes' method in his 'Meditations,' which begins by doubting all sensory and rational knowledge.",
    "upanaya": "The first two dohas of the Hanuman Chalisa exhibit this structure, with the speaker first needing to purify the 'mind-mirror' and then acknowledging being 'buddhihīna' (devoid of intelligence) before proceeding to narrate the pure glory of the divine.",
    "nigamana": "Therefore, the opening verses of the Hanuman Chalisa employ a philosophical method of doubt.",
    "grounding_authority": "Comparative Philosophy & Literary Analysis"
  },
  {
    "domain": "Cultural History / Comparative Literature",
    "pratijna": "The Hanuman Chalisa is a product of Hindu-Muslim cultural syncretism.",
    "hetu": "Because it employs the Dohā meter, a poetic form of Persian origin, to convey Hindu devotional themes.",
    "udaharana": "Whenever a cultural artifact combines formal elements from one tradition (like a poetic meter) with thematic content from another, it is an example of syncretism, as seen in the architecture of the Taj Mahal, which blends Persian and Mughal forms with Indian design elements.",
    "upanaya": "The Hanuman Chalisa is such an artifact, using the Dohā couplet form, which was popularized in India through the influence of Persianate courtly literature, as the primary vehicle for a central Bhakti devotional text.",
    "nigamana": "Therefore, the Hanuman Chalisa is a product of Hindu-Muslim cultural syncretism.",
    "grounding_authority": "Comparative Literature & Cultural History"
  }
]

records: List[Dict[str, Any]] = [validate_record(r) for r in SEED_JSON]

# Optional: attach lightweight source context from a local note if present
src_note = SOURCES_DIR / 'iep' / 'al-ghazali_iep_20250815.txt'
if src_note.exists():
    try:
        txt = src_note.read_text(encoding='utf-8')
        header = '\n'.join(txt.splitlines()[:10])
        for r in records:
            r['source'] = str(src_note)
            r['notes'] = (r.get('notes') or '') + f" | Context: {header}"
    except Exception:
        pass

print(f"Loaded {len(records)} records for review.")

In [None]:
# 4) Pretty Printer for Nyaya Arguments
from rich.markdown import Markdown

def show_record(r: Dict[str, Any], expand: bool=False) -> None:
    status = r.get('status','queued')
    color = {
        'approved': 'green',
        'edited': 'yellow',
        'disapproved': 'red',
        'queued': 'cyan'
    }.get(status, 'cyan')

    def trunc(s: str, n=400):
        return s if expand or len(s) <= n else s[:n] + '…'

    title = Text(f"{r.get('domain','(no domain)')}  [{status}]", style=f"bold {color}")
    body = (
        f"[bold]Pratijna:[/bold] {trunc(r.get('pratijna',''))}\n"
        f"[bold]Hetu:[/bold] {trunc(r.get('hetu',''))}\n"
        f"[bold]Udaharana:[/bold] {trunc(r.get('udaharana',''))}\n"
        f"[bold]Upanaya:[/bold] {trunc(r.get('upanaya',''))}\n"
        f"[bold]Nigamana:[/bold] {trunc(r.get('nigamana',''))}\n"
        f"[bold]Authority:[/bold] {trunc(r.get('grounding_authority',''))}\n"
    )
    footer = f"Source: {r.get('source','-')}\nNotes: {trunc(r.get('notes',''))}"
    console.print(Panel.fit(body + "\n" + footer, title=title))

In [None]:
# 5) Interactive Review Widget (Approve / Disapprove / Edit)
cur_idx = 0
editor_visible = False

out = Output()
status_label = Label()
btn_prev = Button(description='Prev', button_style='')
btn_next = Button(description='Next', button_style='')
btn_approve = Button(description='Approve', button_style='success')
btn_disapprove = Button(description='Disapprove', button_style='danger')
btn_edit = Button(description='Edit', button_style='warning')

# 6) Edit Flow (Copy-Edit-Paste)
edit_area = Textarea(placeholder='Record JSON here...', layout={'width':'100%','height':'200px'})
btn_apply = Button(description='Apply Edit', button_style='warning')
btn_cancel = Button(description='Cancel Edit', button_style='')

progress = IntProgress(min=0, max=len(records), value=0, description='Reviewed')


def refresh_view():
    status_label.value = f"Item {cur_idx+1}/{len(records)}"
    out.clear_output()
    with out:
        show_record(records[cur_idx])
        if editor_visible:
            print('\n--- Edit JSON below (keep keys). ---')
            print('Tip: Preserve the id; required fields must be present.')

refresh_view()


def on_prev(_):
    global cur_idx
    cur_idx = max(0, cur_idx-1)
    refresh_view()

def on_next(_):
    global cur_idx
    cur_idx = min(len(records)-1, cur_idx+1)
    refresh_view()


def persist(decision: str, before: Dict[str,Any], after: Optional[Dict[str,Any]]=None):
    rec = after or before
    row = {
        'id': rec['id'],
        'decision': decision,
        'timestamp': datetime.utcnow().isoformat(),
        'before': before,
        'after': after or before,
        'user': 'reviewer'
    }
    with open(LEDGER_PATH, 'a', encoding='utf-8') as f:
        f.write(json.dumps(row, ensure_ascii=False) + '\n')
    # snapshot
    df = pd.DataFrame(records)
    df.to_csv(SNAPSHOT_CSV, index=False)


def on_approve(_):
    r = records[cur_idx]
    r['status'] = 'approved'
    persist('approved', r)
    progress.value = sum(1 for x in records if x['status'] != 'queued')
    refresh_view()

def on_disapprove(_):
    r = records[cur_idx]
    r['status'] = 'disapproved'
    persist('disapproved', r)
    progress.value = sum(1 for x in records if x['status'] != 'queued')
    refresh_view()

def on_edit(_):
    global editor_visible
    editor_visible = True
    edit_area.value = json.dumps(records[cur_idx], ensure_ascii=False, indent=2)
    refresh_view()


def on_apply(_):
    global editor_visible
    try:
        edited = json.loads(edit_area.value)
        edited_id = records[cur_idx]['id']
        before = copy.deepcopy(records[cur_idx])
        # Preserve id
        edited['id'] = edited_id
        edited = validate_record(edited)
        edited['status'] = 'edited'
        records[cur_idx] = edited
        persist('edited', before, edited)
        editor_visible = False
        progress.value = sum(1 for x in records if x['status'] != 'queued')
        refresh_view()
    except Exception as e:
        out.append_stdout(f"Edit error: {e}\n")


def on_cancel(_):
    global editor_visible
    editor_visible = False
    refresh_view()

btn_prev.on_click(on_prev)
btn_next.on_click(on_next)
btn_approve.on_click(on_approve)
btn_disapprove.on_click(on_disapprove)
btn_edit.on_click(on_edit)
btn_apply.on_click(on_apply)
btn_cancel.on_click(on_cancel)

ui = VBox([
    HBox([status_label, progress]),
    out,
    HBox([btn_prev, btn_next, btn_approve, btn_disapprove, btn_edit]),
    edit_area,
    HBox([btn_apply, btn_cancel])
])

ui

In [None]:
# 7) Decision Logging and Persistence (JSONL/CSV)
print(f"Ledger: {LEDGER_PATH}")
print(f"Snapshot CSV: {SNAPSHOT_CSV}")

# Helper: write approved snapshot for this session

def write_approved_snapshot(out_path: Path):
    with out_path.open('w', encoding='utf-8') as f:
        for r in records:
            if r['status'] == 'approved' or r['status'] == 'edited':
                f.write(json.dumps({k: r[k] for k in REQUIRED_FIELDS}, ensure_ascii=False) + '\n')
    print(f"Wrote approved snapshot: {out_path}")

In [None]:
# 8) Batch Processing Loop and Progress Display
# For this simple UI, the buttons control the loop manually.
# Provide a one-click finalize step to write approved snapshot.
from ipywidgets import Button as _Button
btn_finalize = _Button(description='Finalize & Write Approved Snapshot', button_style='success')


def on_finalize(_):
    write_approved_snapshot(APPROVED_OUT)

btn_finalize.on_click(on_finalize)
btn_finalize

In [None]:
# 9) Hook to External Evaluator (Stub) and Iterative Improvement Queue
IMPROVEMENT_THRESHOLD = 0.5

improvement_queue: List[Dict[str, Any]] = []

def evaluate(record: Dict[str, Any]) -> Dict[str, Any]:
    # Placeholder: could call an external CLI/API. Here, naive score by length of fields
    score = sum(len(str(record.get(k,''))) for k in REQUIRED_FIELDS) / 2000.0
    score = min(1.0, score)
    feedback = 'OK' if score >= IMPROVEMENT_THRESHOLD else 'Needs elaboration or authority specificity.'
    return {'score': score, 'feedback': feedback}

# Evaluate current records and update improvement queue
for r in records:
    res = evaluate(r)
    r['eval'] = res
    if r.get('status') in {'disapproved'} or res['score'] < IMPROVEMENT_THRESHOLD:
        prompt = {
            'instruction': 'Improve the argument; add specificity to grounding_authority; strengthen hetu and examples.',
            'record': {k: r.get(k) for k in REQUIRED_FIELDS},
            'feedback': res['feedback'],
            'context_hint': r.get('notes','')
        }
        improvement_queue.append(prompt)

print(f"Improvement queue size: {len(improvement_queue)}")

In [None]:
# 10) Resume From Saved State and Re-run + Smoke Tests
from collections import defaultdict

def load_state(ledger_path: Path) -> List[Dict[str,Any]]:
    if not ledger_path.exists():
        return records
    latest: Dict[str, Dict[str,Any]] = {}
    with ledger_path.open('r', encoding='utf-8') as f:
        for line in f:
            try:
                row = json.loads(line)
                rid = row['id']
                latest[rid] = row['after']
            except Exception:
                continue
    id_to_idx = {r['id']: i for i,_r in enumerate(records)}
    for rid, rec in latest.items():
        if rid in id_to_idx:
            records[id_to_idx[rid]] = rec
    return records

# Smoke tests
assert all(k in records[0] for k in REQUIRED_FIELDS), 'required fields missing'
write_approved_snapshot(SESSION_DIR / 'approved_smoketest.jsonl')
show_record(records[0])
print('Smoke tests passed. Resume ready.')