<a href="https://colab.research.google.com/github/N23-17/1theka/blob/main/FT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import argparse
import subprocess
import sys
from pathlib import Path


# ==============================
# OLLAMA LLM CLIENT
# ==============================

class OllamaClient:
    def __init__(self, model="llama3"):
        self.model = model

    def generate(self, prompt: str) -> str:
        try:
            result = subprocess.run(
                ["ollama", "run", self.model],
                input=prompt,
                text=True,
                capture_output=True,
                check=True
            )
            return result.stdout.strip()
        except subprocess.CalledProcessError as e:
            print("Error calling Ollama:")
            print(e.stderr)
            sys.exit(1)


# ==============================
# STAGE 1 — FACT EXTRACTION
# ==============================

def extract_facts(text: str, mode: str, llm: OllamaClient) -> str:
    stage1_prompt = f"""
You are a forensic fact analyst.

Your task is to extract explicit, verifiable factual atoms from the text below
and organize them into structured categories.

STRICT RULES:
- One fact per item.
- Each fact must be a short, self-contained sentence.
- Do NOT interpret, explain, justify, or speculate.
- Do NOT merge multiple facts.
- If a fact is implied but not clearly stated, place it under Assumption.
- If a category has no valid entries, omit it entirely.

CATEGORIES:
- Event
- Decision
- Timeline
- Person / Organization
- Number / Statistic
- Claim / Argument
- Risk
- Assumption
- Open Question / Uncertainty

OUTPUT MODE: {mode.upper()}

If OUTPUT MODE is TEXT:
- Use clear section headers
- Use bullet points

If OUTPUT MODE is JSON:
- Return a valid JSON object
- Keys must be snake_case
- Values must be arrays of strings
- Do not include empty arrays
- Do not include commentary

TEXT:
{text}
"""
    return llm.generate(stage1_prompt)


# ==============================
# STAGE 2 — INSIGHT GENERATION
# ==============================

def generate_insights(facts: str, llm: OllamaClient) -> str:
    stage2_prompt = f"""
You are a second-order thinker.

You are given structured facts extracted from a text.
Do NOT add new facts.

Pay special attention to:
- Decisions
- Assumptions
- Risks
- Open Questions

TASKS:

1) Generate THREE non-obvious implications that logically follow
   but are not explicitly stated.

2) Convert the facts into reusable TEACHING POINTS.
   - Express them as general principles.
   - Reference the relevant fact category.

FORMAT:

Non-Obvious Implications:
1.
2.
3.

Teaching Points:
- Lesson:
  Based on:

FACTS:
{facts}
"""
    return llm.generate(stage2_prompt)


# ==============================
# MAIN CLI
# ==============================

def main():
    parser = argparse.ArgumentParser(description="Insight Distiller (Ollama Edition)")
    parser.add_argument("input", help="Path to input text file")
    parser.add_argument(
        "--mode",
        choices=["text", "json"],
        default="text",
        help="Stage 1 output format"
    )
    parser.add_argument(
        "--model",
        default="llama3",
        help="Ollama model name (default: llama3)"
    )

    args = parser.parse_args()

    input_path = Path(args.input)

    if not input_path.exists():
        print("Input file not found.")
        sys.exit(1)

    with open(input_path, "r", encoding="utf-8") as f:
        raw_text = f.read()

    llm = OllamaClient(model=args.model)

    print("\nRunning Stage 1 — Fact Extraction...\n")
    facts = extract_facts(raw_text, args.mode, llm)

    print("=== STAGE 1 OUTPUT ===\n")
    print(facts)

    print("\nRunning Stage 2 — Insight Generation...\n")
    insights = generate_insights(facts, llm)

    print("=== STAGE 2 OUTPUT ===\n")
    print(insights)


if __name__ == "__main__":
    main()