# Pramana Explorer: Interactive Navya-Nyaya Reasoning Demo

This notebook provides an interactive exploration of the Pramana epistemic reasoning engine, demonstrating the 6-phase Navya-Nyaya methodology for systematic logical problem-solving.

## 1. Setup & Configuration

In [None]:
# Install dependencies (with Colab detection)
import sys
import subprocess
from pathlib import Path

# Check if running in Colab
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

# Core packages (always needed)
core_packages = [
    "ipywidgets>=8.0.0",
    "ipython>=8.0.0",
    "pydantic>=2.0.0",
    "pyyaml>=6.0",
    "requests",
]

# Backend-specific packages (install on demand)
# - Ollama: pip install ollama
# - llama.cpp: pip install llama-cpp-python
# - HF Inference: pip install huggingface_hub
# - Transformers: pip install transformers torch accelerate peft
# - OpenWebUI: pip install openai

def install_packages(pkgs, quiet=True):
    """Install packages with pip."""
    cmd = [sys.executable, "-m", "pip", "install"]
    if quiet:
        cmd.append("-q")
    cmd.extend(pkgs)
    subprocess.check_call(cmd)

install_packages(core_packages)

if IN_COLAB:
    from google.colab import output
    output.enable_custom_widget_manager()

print("✓ Core dependencies installed")
print(f"✓ Environment: {'Google Colab' if IN_COLAB else 'Local'}")
print()
print("Backend-specific packages (install as needed):")
print("  Ollama:       pip install ollama")
print("  llama.cpp:    pip install llama-cpp-python")
print("  HF Inference: pip install huggingface_hub")
print("  Transformers: pip install transformers torch accelerate peft")
print("  OpenWebUI:    pip install openai")

In [None]:
# Import core modules
import os
import re
import json
from getpass import getpass
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from IPython.display import display, HTML, Markdown
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, HBox, VBox, Output

# Import Pramana backend (self-contained, no external dependencies)
from pramana_backend import (
    create_backend,
    STAGE_CONFIGS,
    build_user_prompt,
    is_colab,
    EXAMPLE_PROBLEMS,
    load_test_problems,
    parse_nyaya_phases,
    validate_structure as backend_validate_structure,
    score_content_quality as backend_score_content_quality,
    extract_final_answer,
    score_answers,
    setup_ollama,
    download_gguf,
)

print("✓ Modules imported (self-contained, no project dependencies)")

In [None]:
# Configuration: Backend selection and credentials
backend_dropdown = widgets.Dropdown(
    options=[
        ("Transformers (Local GPU/CPU)", "transformers"),
        ("HuggingFace Inference API", "hf_inference"),
        ("Ollama (Local)", "ollama"),
        ("llama.cpp (GGUF)", "llamacpp"),
        ("OpenWebUI API", "openwebui"),
    ],
    value="transformers",
    description="Backend:",
    style={"description_width": "initial"},
)

stage_dropdown = widgets.Dropdown(
    options=list(STAGE_CONFIGS.keys()),
    value="Stage 0",
    description="Stage:",
    style={"description_width": "initial"},
)

model_variant_dropdown = widgets.Dropdown(
    options=[("Tuned model", "tuned"), ("Base model", "base"), ("Both (comparison)", "both")],
    value="both",
    description="Model:",
    style={"description_width": "initial"},
)

# Credential entry
hf_token_input = widgets.Password(
    value="",
    placeholder="Enter HF_TOKEN (optional for Transformers/HF Inference)",
    description="HF Token:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="95%"),
)

# Backend-specific settings
ollama_model_input = widgets.Text(
    value="nyaya-llama-3b-stage0",
    placeholder="e.g. nyaya-llama-3b-stage0 or llama3.2:3b",
    description="Ollama model:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="95%"),
)

ollama_url_input = widgets.Text(
    value="http://localhost:11434",
    description="Ollama URL:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="95%"),
)

gguf_path_input = widgets.Text(
    value="",
    placeholder="Path to .gguf file (or llama.cpp server URL like http://localhost:8080)",
    description="GGUF path/URL:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="95%"),
)

openwebui_url_input = widgets.Text(
    value="http://localhost:3000/api",
    description="OpenWebUI URL:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="95%"),
)

openwebui_key_input = widgets.Password(
    value="",
    placeholder="OpenWebUI API key",
    description="API Key:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="95%"),
)

openwebui_model_input = widgets.Text(
    value="nyaya-llama-3b-stage0",
    placeholder="Model name in OpenWebUI",
    description="Model name:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="95%"),
)

# Collapsible backend-specific settings
backend_settings_output = Output()

def update_backend_settings(change=None):
    """Show/hide backend-specific settings based on selection."""
    with backend_settings_output:
        backend_settings_output.clear_output()
        bt = backend_dropdown.value
        if bt == "transformers":
            display(widgets.HTML("<i>Uses HuggingFace model IDs from stage config. Set HF_TOKEN above for gated models.</i>"))
        elif bt == "hf_inference":
            display(widgets.HTML("<i>Uses HuggingFace Inference API. HF_TOKEN required.</i>"))
        elif bt == "ollama":
            # Auto-setup button for Ollama
            ollama_setup_btn = widgets.Button(description="Auto-Setup Ollama", button_style="info", icon="magic")
            ollama_setup_out = Output()
            def _ollama_auto(b):
                with ollama_setup_out:
                    ollama_setup_out.clear_output()
                    try:
                        install_packages(["ollama"])
                        model = setup_ollama(model_name=ollama_model_input.value.strip())
                        print(f"✓ Ollama ready with model: {model}")
                    except Exception as e:
                        print(f"✗ Setup failed: {e}")
            ollama_setup_btn.on_click(_ollama_auto)
            display(VBox([
                widgets.HTML("<b>Ollama Settings</b> — Click Auto-Setup to install Ollama + download model, or configure manually"),
                ollama_model_input,
                ollama_url_input,
                ollama_setup_btn,
                ollama_setup_out,
            ]))
        elif bt == "llamacpp":
            # Auto-setup button for llama.cpp
            gguf_setup_btn = widgets.Button(description="Auto-Download GGUF", button_style="info", icon="download")
            gguf_setup_out = Output()
            def _gguf_auto(b):
                with gguf_setup_out:
                    gguf_setup_out.clear_output()
                    try:
                        install_packages(["llama-cpp-python"])
                        path = download_gguf()
                        gguf_path_input.value = path
                        print(f"✓ GGUF downloaded: {path}")
                    except Exception as e:
                        print(f"✗ Download failed: {e}")
            gguf_setup_btn.on_click(_gguf_auto)
            display(VBox([
                widgets.HTML("<b>llama.cpp Settings</b> — Click Auto-Download or provide path to GGUF / llama.cpp server URL"),
                gguf_path_input,
                gguf_setup_btn,
                gguf_setup_out,
            ]))
        elif bt == "openwebui":
            display(VBox([
                widgets.HTML("<b>OpenWebUI Settings</b>"),
                openwebui_url_input,
                openwebui_key_input,
                openwebui_model_input,
            ]))

backend_dropdown.observe(update_backend_settings, names='value')

def setup_config():
    """Setup configuration from widgets."""
    global config
    backend_type = backend_dropdown.value
    stage_name = stage_dropdown.value
    variant = model_variant_dropdown.value
    hf_token = hf_token_input.value.strip() or os.getenv("HF_TOKEN")
    
    if hf_token:
        os.environ["HF_TOKEN"] = hf_token
    
    stage_config = STAGE_CONFIGS[stage_name]
    
    # Build backend based on type
    def _make_backend(model_id):
        if backend_type == "transformers":
            return create_backend("transformers", model_id=model_id, hf_token=hf_token)
        elif backend_type == "hf_inference":
            return create_backend("hf_inference", model_id=model_id, hf_token=hf_token)
        elif backend_type == "ollama":
            return create_backend("ollama", model_name=ollama_model_input.value.strip(),
                                  base_url=ollama_url_input.value.strip())
        elif backend_type == "llamacpp":
            path_or_url = gguf_path_input.value.strip()
            if path_or_url.startswith("http"):
                return create_backend("llamacpp", server_url=path_or_url)
            else:
                return create_backend("llamacpp", model_path=path_or_url)
        elif backend_type == "openwebui":
            return create_backend("openwebui",
                                  base_url=openwebui_url_input.value.strip(),
                                  api_key=openwebui_key_input.value.strip() or None,
                                  model_name=openwebui_model_input.value.strip())
    
    try:
        base_backend = _make_backend(stage_config.base_model_id) if variant in ("base", "both") else None
        tuned_backend = _make_backend(stage_config.tuned_model_id) if variant in ("tuned", "both") else None
        
        config = {
            "backend_type": backend_type,
            "stage_config": stage_config,
            "base_backend": base_backend,
            "tuned_backend": tuned_backend,
        }
        print(f"\u2713 Configuration set: {stage_name} with {backend_type} backend")
        if base_backend:
            print(f"  Base model ready: {stage_config.base_model_id}")
        if tuned_backend:
            print(f"  Tuned model ready: {stage_config.tuned_model_id}")
        return config
    except Exception as e:
        print(f"\u2717 Configuration failed: {e}")
        import traceback
        traceback.print_exc()
        return None

setup_button = widgets.Button(
    description="Apply Configuration",
    button_style="primary",
    icon="check",
)

config_output = Output()

def on_setup_click(button):
    with config_output:
        config_output.clear_output()
        setup_config()

setup_button.on_click(on_setup_click)

# Initial render of backend settings
update_backend_settings()

display(VBox([
    widgets.HTML("<h3>Configuration</h3>"),
    backend_dropdown,
    stage_dropdown,
    model_variant_dropdown,
    hf_token_input,
    backend_settings_output,
    setup_button,
    config_output,
]))

config = None  # Will be set after user clicks Apply

## 2. Introduction to Navya-Nyaya Reasoning

### What is Navya-Nyaya?

Navya-Nyaya ("New Logic") is a 2,500-year-old Indian epistemological system that provides a structured methodology for systematic reasoning. Unlike Western formal logic, Navya-Nyaya integrates logic and epistemology, requiring explicit grounding in concrete examples and universal rules.

### The 6-Phase Framework

Pramana enforces a structured 6-phase Nyaya methodology:

1. **Samshaya (Doubt Analysis)** - Classify the type of uncertainty/ambiguity
2. **Pramana (Evidence Sources)** - Identify valid knowledge sources:
   - Pratyaksha (Direct Perception)
   - Anumana (Inference)
   - Upamana (Comparison)
   - Shabda (Testimony)
3. **Pancha Avayava (5-Member Syllogism)** - Construct formal argument with:
   - Pratijna (Thesis)
   - Hetu (Reason)
   - Udaharana (Universal Example)
   - Upanaya (Application)
   - Nigamana (Conclusion)
4. **Tarka (Counterfactual Testing)** - Use reductio ad absurdum to verify conclusions
5. **Hetvabhasa (Fallacy Detection)** - Check for reasoning errors
6. **Nirnaya (Ascertainment)** - Reach definitive conclusion or explicitly state insufficient evidence

In [None]:
# Load example from embedded data (no external files needed)
example = EXAMPLE_PROBLEMS[0]  # pramana-001: constraint satisfaction

display(Markdown(f"## Example Problem: {example['id']}\n\n{example['problem']}"))
display(Markdown(f"**Ground Truth:** {example['ground_truth']}"))
display(Markdown(f"**Problem Type:** {example['problem_type']} | **Difficulty:** {example['difficulty']}"))

### Phase 1: Samshaya (Doubt Analysis)

The example demonstrates **Samana Dharma Upapatti** (Multiple possibilities share similar properties):

- There are three people and three pets, creating multiple possible assignments
- Without systematic reasoning, we cannot determine which person has which pet
- The doubt arises because multiple arrangements are conceivable

### Phase 2: Pramana (Sources of Knowledge)

The example identifies:

- **Pratyaksha**: Directly stated constraints ("Alice does not have the cat", "Bob has the dog", etc.)
- **Anumana**: Inferences like "If Bob has the dog, then neither Alice nor Carol has the dog"
- **Upamana**: Comparison to constraint satisfaction problems with mutual exclusivity
- **Shabda**: Logical principles (Law of Excluded Middle, Non-Contradiction, etc.)

### Phase 3: Pancha Avayava (5-Member Syllogism)

The example constructs three syllogisms:

1. **Establishing Bob's Pet**: Pratijna="Bob has the dog", supported by direct constraint
2. **Establishing Alice's Pet**: Pratijna="Alice has the fish", via elimination (cannot have cat or dog)
3. **Establishing Carol's Pet**: Pratijna="Carol has the cat", via completeness principle

Each syllogism includes all 5 members with explicit Udaharana (universal rules) and Upanaya (application).

### Phase 4: Tarka (Counterfactual Testing)

The example uses reductio ad absurdum:

- **Hypothesis**: "Suppose Carol does not have the cat"
- **Consequence**: This leads to Carol having no pet, violating completeness
- **Analysis**: This is absurd given the problem constraints
- **Resolution**: Therefore, Carol must have the cat

### Phase 5: Hetvabhasa (Fallacy Check)

The example checks for:

- **Savyabhichara** (Erratic reasoning): None detected
- **Viruddha** (Contradictory reasoning): None detected
- **Prakaranasama** (Circular reasoning): None detected
- **Sadhyasama** (Begging the question): None detected

### Phase 6: Nirnaya (Ascertainment)

**Status**: Definitive Knowledge

**Final Answer**: Alice has the fish, Bob has the dog, and Carol has the cat.

**Confidence**: High - The solution is logically necessary given the constraints.

## 3. Interactive Comparison: Base vs Tuned

In [None]:
# Load example problems (self-contained, no external files needed)
examples = load_test_problems("embedded")
print(f"✓ Loaded {len(examples)} embedded examples")

# Display problem list
for i, ex in enumerate(examples):
    print(f"  {i+1}. [{ex['id']}] {ex['problem_type']} ({ex['difficulty']})")

In [None]:
# Example selector and generation controls
example_selector = widgets.Dropdown(
    options=[(ex["id"], idx) for idx, ex in enumerate(examples)],
    description="Example:",
    style={"description_width": "initial"},
)

# --- Hyperparameter controls (matching HF Space app) ---
max_tokens_slider = widgets.IntSlider(
    value=2048,
    min=64,
    max=4096,
    step=32,
    description="Max new tokens:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="95%"),
)

temperature_slider = widgets.FloatSlider(
    value=0.0,
    min=0.0,
    max=1.5,
    step=0.05,
    description="Temperature:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="95%"),
)

top_p_slider = widgets.FloatSlider(
    value=1.0,
    min=0.0,
    max=1.0,
    step=0.05,
    description="Top-p:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="95%"),
)

top_k_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=200,
    step=5,
    description="Top-k:",
    style={"description_width": "initial"},
    layout=widgets.Layout(width="95%"),
)

generate_button = widgets.Button(
    description="Generate",
    button_style="primary",
    icon="play",
)

output_area = Output()

def generate_comparison(button):
    """Generate outputs from both models and display side-by-side."""
    if config is None:
        with output_area:
            print("⚠ Please configure backend and stage first")
        return
    
    with output_area:
        output_area.clear_output()
        
        idx = example_selector.value
        example = examples[idx]
        
        print(f"Generating for: {example['id']}")
        print("=" * 80)
        
        # Build prompt
        user_prompt = build_user_prompt(example["problem"])
        stage_config = config["stage_config"]
        
        # Generation parameters
        gen_kwargs = dict(
            system_prompt=stage_config.system_prompt,
            max_new_tokens=max_tokens_slider.value,
            temperature=temperature_slider.value,
            top_p=top_p_slider.value,
            top_k=top_k_slider.value,
        )
        
        # Generate from base model
        print(f"\n[Base Model] Generating (max_tokens={gen_kwargs['max_new_tokens']}, "
              f"temp={gen_kwargs['temperature']}, top_p={gen_kwargs['top_p']}, top_k={gen_kwargs['top_k']})...")
        try:
            base_output = config["base_backend"].generate(user_prompt, **gen_kwargs)
        except Exception as e:
            base_output = f"Error: {e}"
        
        # Generate from tuned model
        print("[Tuned Model] Generating...")
        try:
            tuned_output = config["tuned_backend"].generate(user_prompt, **gen_kwargs)
        except Exception as e:
            tuned_output = f"Error: {e}"
        
        # Display side-by-side
        display(HTML("""
        <style>
        .comparison-container { display: flex; gap: 20px; }
        .model-output { flex: 1; border: 1px solid #ccc; padding: 10px; border-radius: 5px; }
        .base-model { background-color: #fff3cd; }
        .tuned-model { background-color: #d1ecf1; }
        </style>
        """))
        
        display(HTML(f"""
        <div class="comparison-container">
            <div class="model-output base-model">
                <h3>Base Model ({stage_config.base_model_id})</h3>
                <pre style="white-space: pre-wrap;">{base_output}</pre>
            </div>
            <div class="model-output tuned-model">
                <h3>Tuned Model ({stage_config.tuned_model_id})</h3>
                <pre style="white-space: pre-wrap;">{tuned_output}</pre>
            </div>
        </div>
        """))
        
        # Store outputs for analysis
        global last_base_output, last_tuned_output
        last_base_output = base_output
        last_tuned_output = tuned_output

generate_button.on_click(generate_comparison)

display(VBox([
    widgets.HTML("<h3>Generation Controls</h3>"),
    example_selector,
    widgets.HTML("<b>Hyperparameters</b> (adjust for your hardware — larger tokens = longer output):"),
    max_tokens_slider,
    temperature_slider,
    top_p_slider,
    top_k_slider,
    generate_button,
    output_area,
]))

In [None]:
# Phase highlighting function
def highlight_phases(text: str) -> str:
    """Add HTML highlighting to Nyaya phases."""
    phases = [
        (r"## Samshaya.*?\n", "#ffeb3b"),
        (r"## Pramana.*?\n", "#4caf50"),
        (r"## Pancha Avayava.*?\n", "#2196f3"),
        (r"## Tarka.*?\n", "#ff9800"),
        (r"## Hetvabhasa.*?\n", "#9c27b0"),
        (r"## Nirnaya.*?\n", "#f44336"),
    ]
    
    for pattern, color in phases:
        text = re.sub(
            pattern,
            f'<span style="background-color: {color}; padding: 2px 4px; border-radius: 3px; font-weight: bold;">\\g<0></span>',
            text,
            flags=re.IGNORECASE,
        )
    
    return text

# Display highlighted output
if 'last_base_output' in globals():
    display(HTML(f"<h3>Base Model Output (Highlighted)</h3><pre>{highlight_phases(last_base_output)}</pre>"))
if 'last_tuned_output' in globals():
    display(HTML(f"<h3>Tuned Model Output (Highlighted)</h3><pre>{highlight_phases(last_tuned_output)}</pre>"))

## 4. Phase-by-Phase Output Analysis

In [None]:
# Phase parsing with regex
def parse_phases(text: str) -> Dict[str, Optional[str]]:
    """Parse 6 phases from model output using regex."""
    phases = {}
    
    # Phase patterns
    patterns = {
        "samshaya": r"##\s+Samshaya.*?\n(.*?)(?=##\s+|\Z)",
        "pramana": r"##\s+Pramana.*?\n(.*?)(?=##\s+|\Z)",
        "pancha_avayava": r"##\s+Pancha Avayava.*?\n(.*?)(?=##\s+|\Z)",
        "tarka": r"##\s+Tarka.*?\n(.*?)(?=##\s+|\Z)",
        "hetvabhasa": r"##\s+Hetvabhasa.*?\n(.*?)(?=##\s+|\Z)",
        "nirnaya": r"##\s+Nirnaya.*?\n(.*?)(?=##\s+|\Z)",
    }
    
    for phase_name, pattern in patterns.items():
        match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
        phases[phase_name] = match.group(1).strip() if match else None
    
    return phases

# Test parsing
if 'last_tuned_output' in globals():
    parsed = parse_phases(last_tuned_output)
    print("Parsed Phases:")
    for phase, content in parsed.items():
        status = "✓" if content else "✗"
        length = len(content) if content else 0
        print(f"{status} {phase}: {length} chars")

In [None]:
# Scorecard table
def create_scorecard(base_output: str, tuned_output: str) -> HTML:
    """Create a scorecard comparing phase completeness."""
    base_phases = parse_phases(base_output)
    tuned_phases = parse_phases(tuned_output)
    
    phase_names = [
        "Samshaya",
        "Pramana",
        "Pancha Avayava",
        "Tarka",
        "Hetvabhasa",
        "Nirnaya",
    ]
    
    rows = []
    for phase in phase_names:
        phase_key = phase.lower().replace(" ", "_")
        base_present = "✓" if base_phases.get(phase_key) else "✗"
        tuned_present = "✓" if tuned_phases.get(phase_key) else "✗"
        
        base_color = "green" if base_present == "✓" else "red"
        tuned_color = "green" if tuned_present == "✓" else "red"
        
        rows.append(f"""
        <tr>
            <td><strong>{phase}</strong></td>
            <td style="color: {base_color};">{base_present}</td>
            <td style="color: {tuned_color};">{tuned_present}</td>
        </tr>
        """)
    
    html = f"""
    <table border="1" style="border-collapse: collapse; width: 100%;">
        <thead>
            <tr style="background-color: #f0f0f0;">
                <th>Phase</th>
                <th>Base Model</th>
                <th>Tuned Model</th>
            </tr>
        </thead>
        <tbody>
            {''.join(rows)}
        </tbody>
    </table>
    """
    
    return HTML(html)

# Display scorecard
if 'last_base_output' in globals() and 'last_tuned_output' in globals():
    display(create_scorecard(last_base_output, last_tuned_output))

In [None]:
# Color-coded visualization
def visualize_phases(output: str, title: str = "Output") -> HTML:
    """Create color-coded phase visualization."""
    phases = parse_phases(output)
    
    colors = {
        "samshaya": "#ffeb3b",
        "pramana": "#4caf50",
        "pancha_avayava": "#2196f3",
        "tarka": "#ff9800",
        "hetvabhasa": "#9c27b0",
        "nirnaya": "#f44336",
    }
    
    bars = []
    for phase_key, color in colors.items():
        present = phases.get(phase_key) is not None
        width = 100 if present else 0
        bg_color = color if present else "#cccccc"
        
        phase_name = phase_key.replace("_", " ").title()
        bars.append(f"""
        <div style="margin: 5px 0;">
            <div style="display: inline-block; width: 150px;">{phase_name}</div>
            <div style="display: inline-block; width: {width}%; height: 20px; background-color: {bg_color}; border-radius: 3px;"></div>
            <span style="margin-left: 10px;">{'✓' if present else '✗'}</span>
        </div>
        """)
    
    html = f"""
    <div style="border: 1px solid #ccc; padding: 15px; border-radius: 5px; margin: 10px 0;">
        <h4>{title}</h4>
        {''.join(bars)}
    </div>
    """
    
    return HTML(html)

# Display visualizations
if 'last_base_output' in globals():
    display(visualize_phases(last_base_output, "Base Model"))
if 'last_tuned_output' in globals():
    display(visualize_phases(last_tuned_output, "Tuned Model"))

## 5. Structural Validation

In [None]:
# Tier 1 validation (adapted from structure.py)
def validate_structure(text: str) -> Tuple[bool, List[str]]:
    """Validate structure using Tier 1 checks."""
    errors = []
    
    # Check all 6 phases exist
    required_phases = [
        "Samshaya",
        "Pramana",
        "Pancha Avayava",
        "Tarka",
        "Hetvabhasa",
        "Nirnaya",
    ]
    
    for phase in required_phases:
        pattern = rf"##\s+{re.escape(phase)}.*?\n"
        if not re.search(pattern, text, re.IGNORECASE):
            errors.append(f"Missing phase: {phase}")
    
    # Check Pramana has at least one knowledge source
    pramana_match = re.search(r"##\s+Pramana.*?\n(.*?)(?=##\s+|\Z)", text, re.DOTALL | re.IGNORECASE)
    if pramana_match:
        pramana_text = pramana_match.group(1)
        sources = ["Pratyaksha", "Anumana", "Upamana", "Shabda"]
        found_sources = [s for s in sources if re.search(rf"###\s+{re.escape(s)}", pramana_text, re.IGNORECASE)]
        if not found_sources:
            errors.append("Pramana must have at least one knowledge source")
    
    # Check Pancha Avayava has at least one syllogism
    pancha_match = re.search(r"##\s+Pancha Avayava.*?\n(.*?)(?=##\s+|\Z)", text, re.DOTALL | re.IGNORECASE)
    if pancha_match:
        pancha_text = pancha_match.group(1)
        syllogism_count = len(re.findall(r"###\s+Syllogism", pancha_text, re.IGNORECASE))
        if syllogism_count == 0:
            errors.append("Pancha Avayava must contain at least one syllogism")
        else:
            # Check each syllogism has all 5 members
            syllogisms = re.finditer(r"###\s+Syllogism.*?\n(.*?)(?=###\s+Syllogism|##\s+|\Z)", pancha_text, re.DOTALL | re.IGNORECASE)
            for idx, syll_match in enumerate(syllogisms, 1):
                syll_text = syll_match.group(1)
                required_members = ["Pratijna", "Hetu", "Udaharana", "Upanaya", "Nigamana"]
                missing = []
                for member in required_members:
                    if not re.search(rf"\*\*{re.escape(member)}.*?\*\*:", syll_text, re.IGNORECASE):
                        missing.append(member)
                if missing:
                    errors.append(f"Syllogism {idx} missing members: {', '.join(missing)}")
    
    return len(errors) == 0, errors

# Validate outputs
if 'last_base_output' in globals():
    is_valid, errors = validate_structure(last_base_output)
    print(f"Base Model Validation: {'✓ PASS' if is_valid else '✗ FAIL'}")
    if errors:
        print("Errors:")
        for err in errors:
            print(f"  - {err}")

if 'last_tuned_output' in globals():
    is_valid, errors = validate_structure(last_tuned_output)
    print(f"\nTuned Model Validation: {'✓ PASS' if is_valid else '✗ FAIL'}")
    if errors:
        print("Errors:")
        for err in errors:
            print(f"  - {err}")

In [None]:
# Display validation results with formatting
def display_validation_results(output: str, model_name: str):
    """Display validation results in a formatted way."""
    is_valid, errors = validate_structure(output)
    
    status_color = "green" if is_valid else "red"
    status_icon = "✓" if is_valid else "✗"
    
    html = f"""
    <div style="border: 2px solid {status_color}; padding: 15px; border-radius: 5px; margin: 10px 0;">
        <h3 style="color: {status_color};">{status_icon} {model_name} Validation</h3>
        <p><strong>Status:</strong> {'PASS' if is_valid else 'FAIL'}</p>
    """
    
    if errors:
        html += "<ul>"
        for err in errors:
            html += f"<li style='color: red;'>{err}</li>"
        html += "</ul>"
    else:
        html += "<p style='color: green;'>All structural checks passed!</p>"
    
    html += "</div>"
    
    return HTML(html)

# Display validation
if 'last_base_output' in globals():
    display(display_validation_results(last_base_output, "Base Model"))
if 'last_tuned_output' in globals():
    display(display_validation_results(last_tuned_output, "Tuned Model"))

## 6. Content Quality Scoring

In [None]:
# Content quality scoring (adapted from content_quality.py)
def score_content_quality(text: str, problem: str) -> Dict[str, float]:
    """Score content quality using heuristics."""
    scores = {}
    
    # Parse phases
    phases = parse_phases(text)
    
    # 1. Pratyaksha grounding
    pratyaksha_score = 0.0
    if phases.get("pramana"):
        pratyaksha_match = re.search(r"###\s+Pratyaksha.*?\n(.*?)(?=###\s+|##\s+|\Z)", phases["pramana"], re.DOTALL | re.IGNORECASE)
        if pratyaksha_match:
            pratyaksha_text = pratyaksha_match.group(1)
            # Count list items
            claims = [line.strip() for line in pratyaksha_text.split("\n") if line.strip().startswith("-")]
            if claims:
                # Simple grounding check: see if claim tokens appear in problem
                problem_lower = problem.lower()
                grounded = sum(1 for claim in claims if any(word in problem_lower for word in claim.lower().split()[:5]))
                pratyaksha_score = grounded / len(claims) if claims else 0.0
    scores["pratyaksha_grounding"] = pratyaksha_score
    
    # 2. Udaharana patterns
    udaharana_valid = False
    if phases.get("pancha_avayava"):
        udaharana_patterns = [
            r"\bwherever\b.+,\s*.+",
            r"\bwhenever\b.+",
            r"\bin all cases where\b.+",
            r"\bfor any\b.+\bif\b.+\bthen\b.+",
        ]
        for pattern in udaharana_patterns:
            if re.search(pattern, phases["pancha_avayava"], re.IGNORECASE):
                udaharana_valid = True
                break
    scores["udaharana_valid"] = 1.0 if udaharana_valid else 0.0
    
    # 3. Tarka meaningfulness
    tarka_meaningful = False
    if phases.get("tarka"):
        negation_markers = ["not", "no", "never", "suppose", "assume", "contrary", "opposite"]
        contradiction_markers = ["contradiction", "contradicts", "impossible", "cannot", "absurd", "violates"]
        
        tarka_lower = phases["tarka"].lower()
        hypothesis_match = re.search(r"\*\*Hypothesis\*\*:\s*(.+?)(?=\*\*|\Z)", phases["tarka"], re.DOTALL | re.IGNORECASE)
        
        has_negation = False
        if hypothesis_match:
            hypothesis = hypothesis_match.group(1).lower()
            has_negation = any(marker in hypothesis for marker in negation_markers)
        
        has_contradiction = any(marker in tarka_lower for marker in contradiction_markers)
        tarka_meaningful = has_negation and has_contradiction
    scores["tarka_meaningful"] = 1.0 if tarka_meaningful else 0.0
    
    # 4. Hetvabhasa completeness
    hetvabhasa_completeness = 0.0
    if phases.get("hetvabhasa"):
        fallacy_types = ["savyabhichara", "viruddha", "prakaranasama", "sadhyasama", "asiddha", "satpratipaksha", "badhita"]
        hetvabhasa_lower = phases["hetvabhasa"].lower()
        found_fallacies = sum(1 for ft in fallacy_types if ft in hetvabhasa_lower)
        hetvabhasa_completeness = found_fallacies / len(fallacy_types)
    scores["hetvabhasa_completeness"] = hetvabhasa_completeness
    
    # Overall score
    scores["overall"] = sum(scores.values()) / len(scores)
    
    return scores

# Score outputs
if 'last_base_output' in globals() and examples:
    example = examples[example_selector.value if 'example_selector' in globals() else 0]
    base_scores = score_content_quality(last_base_output, example["problem"])
    print("Base Model Content Quality Scores:")
    for metric, score in base_scores.items():
        print(f"  {metric}: {score:.2f}")

if 'last_tuned_output' in globals() and examples:
    example = examples[example_selector.value if 'example_selector' in globals() else 0]
    tuned_scores = score_content_quality(last_tuned_output, example["problem"])
    print("\nTuned Model Content Quality Scores:")
    for metric, score in tuned_scores.items():
        print(f"  {metric}: {score:.2f}")

In [None]:
# Visualize content quality scores
def visualize_quality_scores(scores: Dict[str, float], model_name: str) -> HTML:
    """Create bar chart visualization of quality scores."""
    metrics = [k for k in scores.keys() if k != "overall"]
    
    bars = []
    for metric in metrics:
        score = scores[metric]
        width = score * 100
        color = "#4caf50" if score >= 0.7 else "#ff9800" if score >= 0.4 else "#f44336"
        
        bars.append(f"""
        <div style="margin: 8px 0;">
            <div style="display: inline-block; width: 200px;">{metric.replace('_', ' ').title()}</div>
            <div style="display: inline-block; width: 300px; background-color: #e0e0e0; border-radius: 3px; position: relative;">
                <div style="width: {width}%; height: 20px; background-color: {color}; border-radius: 3px;"></div>
            </div>
            <span style="margin-left: 10px; font-weight: bold;">{score:.2f}</span>
        </div>
        """)
    
    overall_score = scores.get("overall", 0.0)
    overall_color = "#4caf50" if overall_score >= 0.7 else "#ff9800" if overall_score >= 0.4 else "#f44336"
    
    html = f"""
    <div style="border: 1px solid #ccc; padding: 15px; border-radius: 5px; margin: 10px 0;">
        <h4>{model_name} Content Quality</h4>
        {''.join(bars)}
        <hr>
        <div style="margin-top: 10px;">
            <strong>Overall Score: <span style="color: {overall_color}; font-size: 1.2em;">{overall_score:.2f}</span></strong>
        </div>
    </div>
    """
    
    return HTML(html)

# Display quality scores
if 'last_base_output' in globals() and examples:
    example = examples[example_selector.value if 'example_selector' in globals() else 0]
    base_scores = score_content_quality(last_base_output, example["problem"])
    display(visualize_quality_scores(base_scores, "Base Model"))

if 'last_tuned_output' in globals() and examples:
    example = examples[example_selector.value if 'example_selector' in globals() else 0]
    tuned_scores = score_content_quality(last_tuned_output, example["problem"])
    display(visualize_quality_scores(tuned_scores, "Tuned Model"))

## 7. Interactive Learning Exercises

In [None]:
# Exercise 1: Identify the doubt type
exercise1_problem = """
Problem: If it rains, the ground gets wet. The ground is wet. Did it rain?
"""

exercise1_question = widgets.HTML(
    value="""
    <h4>Exercise 1: Doubt Type Identification</h4>
    <p>What type of doubt (Samshaya) is present in this problem?</p>
    <pre>{exercise1_problem}</pre>
    """.format(exercise1_problem=exercise1_problem)
)

exercise1_answer = widgets.Dropdown(
    options=[
        ("Select...", ""),
        ("Samana Dharma Upapatti", "samana_dharma_upapatti"),
        ("Vipratipatti", "vipratipatti"),
        ("Anadhyavasaya", "anadhyavasaya"),
    ],
    description="Answer:",
)

exercise1_feedback = Output()

def check_exercise1(change):
    with exercise1_feedback:
        exercise1_feedback.clear_output()
        if exercise1_answer.value == "vipratipatti":
            display(HTML("<p style='color: green;'>✓ Correct! This is Vipratipatti (conflicting possibilities) - rain could cause wet ground, but so could other things.</p>"))
        elif exercise1_answer.value:
            display(HTML("<p style='color: red;'>✗ Not quite. Think about whether there are conflicting possible explanations for the wet ground.</p>"))

exercise1_answer.observe(check_exercise1, names='value')

display(VBox([exercise1_question, exercise1_answer, exercise1_feedback]))

In [None]:
# Exercise 2: Identify Pramana sources
exercise2_text = """
In a logic puzzle: "Alice says she has the red ball. Bob says Alice is lying."

What type of Pramana is "Alice says she has the red ball"?
"""

exercise2_question = widgets.HTML(
    value=f"""
    <h4>Exercise 2: Pramana Source Identification</h4>
    <p>{exercise2_text}</p>
    """.format(exercise2_text=exercise2_text)
)

exercise2_answer = widgets.Dropdown(
    options=[
        ("Select...", ""),
        ("Pratyaksha (Direct Perception)", "pratyaksha"),
        ("Anumana (Inference)", "anumana"),
        ("Upamana (Comparison)", "upamana"),
        ("Shabda (Testimony)", "shabda"),
    ],
    description="Answer:",
)

exercise2_feedback = Output()

def check_exercise2(change):
    with exercise2_feedback:
        exercise2_feedback.clear_output()
        if exercise2_answer.value == "shabda":
            display(HTML("<p style='color: green;'>✓ Correct! This is Shabda (testimony) - we are told what Alice said, not what we directly observed.</p>"))
        elif exercise2_answer.value:
            display(HTML("<p style='color: red;'>✗ Not quite. Think about whether this is something we directly observed or something we were told.</p>"))

exercise2_answer.observe(check_exercise2, names='value')

display(VBox([exercise2_question, exercise2_answer, exercise2_feedback]))

In [None]:
# Exercise 3: Complete the syllogism
exercise3_text = """
Complete this Pancha Avayava syllogism:

Pratijna (Thesis): All birds can fly.
Hetu (Reason): Because they have wings.
Udaharana (Universal Example): ?
Upanaya (Application): ?
Nigamana (Conclusion): ?
"""

exercise3_question = widgets.HTML(
    value=f"""
    <h4>Exercise 3: Complete the Syllogism</h4>
    <pre>{exercise3_text}</pre>
    """.format(exercise3_text=exercise3_text)
)

exercise3_udaharana = widgets.Textarea(
    value="",
    placeholder="Enter Udaharana (Universal Example)...",
    description="Udaharana:",
    layout=widgets.Layout(width="100%", height="80px"),
)

exercise3_feedback = Output()

def check_exercise3(change):
    with exercise3_feedback:
        exercise3_feedback.clear_output()
        answer = exercise3_udaharana.value.lower()
        
        # Check for universal rule pattern
        has_universal = any(word in answer for word in ["wherever", "whenever", "all", "any", "every"])
        has_example = any(word in answer for word in ["eagle", "sparrow", "bird", "example", "instance"])
        
        if has_universal and has_example:
            display(HTML("<p style='color: green;'>✓ Good! Your Udaharana includes both a universal rule and a concrete example. Example: 'Wherever there is a bird with wings, it can fly. For example, an eagle has wings and can fly.'</p>"))
        elif has_universal:
            display(HTML("<p style='color: orange;'>⚠ You have a universal rule, but try to include a concrete example too (e.g., 'like an eagle').</p>"))
        else:
            display(HTML("<p style='color: red;'>✗ Try to include both a universal rule (using words like 'wherever', 'whenever', 'all') and a concrete example.</p>"))

exercise3_udaharana.observe(check_exercise3, names='value')

display(VBox([exercise3_question, exercise3_udaharana, exercise3_feedback]))

## 8. Try Your Own Problem

In [None]:
# Free-form problem input
problem_input = widgets.Textarea(
    value="",
    placeholder="Enter your logical problem here...",
    description="Problem:",
    layout=widgets.Layout(width="100%", height="150px"),
)

custom_generate_button = widgets.Button(
    description="Generate Nyaya Reasoning",
    button_style="success",
    icon="rocket",
)

custom_output_area = Output()

def generate_custom(button):
    """Generate reasoning for custom problem."""
    if config is None:
        with custom_output_area:
            print("⚠ Please configure backend and stage first")
        return
    
    problem = problem_input.value.strip()
    if not problem:
        with custom_output_area:
            print("⚠ Please enter a problem")
        return
    
    with custom_output_area:
        custom_output_area.clear_output()
        
        print(f"Generating reasoning for your problem...")
        print("=" * 80)
        
        # Build prompt
        user_prompt = build_user_prompt(problem)
        stage_config = config["stage_config"]
        
        # Generate from tuned model
        try:
            output = config["tuned_backend"].generate(
                user_prompt,
                system_prompt=stage_config.system_prompt,
                max_new_tokens=max_tokens_slider.value if 'max_tokens_slider' in globals() else 2048,
                temperature=temperature_slider.value if 'temperature_slider' in globals() else 0.0,
                top_p=top_p_slider.value if 'top_p_slider' in globals() else 1.0,
                top_k=top_k_slider.value if 'top_k_slider' in globals() else 0,
            )
            
            # Display output with highlighting
            display(Markdown(f"## Generated Reasoning\n\n{output}"))
            
            # Validate structure
            is_valid, errors = validate_structure(output)
            if is_valid:
                display(HTML("<p style='color: green;'>✓ Structural validation passed!</p>"))
            else:
                display(HTML(f"<p style='color: orange;'>⚠ Structural issues found:</p><ul>{''.join(f'<li>{e}</li>' for e in errors)}</ul>"))
            
            # Score content quality
            scores = score_content_quality(output, problem)
            display(visualize_quality_scores(scores, "Your Problem"))
            
        except Exception as e:
            display(HTML(f"<p style='color: red;'>Error: {e}</p>"))

custom_generate_button.on_click(generate_custom)

display(VBox([
    widgets.HTML("<h3>Try Your Own Problem</h3>"),
    problem_input,
    custom_generate_button,
    custom_output_area,
]))

---

## Summary

This notebook demonstrates:

1. **Structured Reasoning**: The 6-phase Navya-Nyaya methodology
2. **Model Comparison**: Side-by-side comparison of base vs tuned models
3. **Validation**: Structural and content quality validation
4. **Interactive Learning**: Exercises to understand Nyaya concepts
5. **Custom Problems**: Generate reasoning for your own logical problems

For more information, see the [Pramana documentation](docs/) and [CLAUDE.md](CLAUDE.md).