In [None]:
#| default_exp ai

# üß† AI-Powered Text Operations

Multi-provider AI operations using the `litellm` library for flexible text editing and analysis.

**Features:**
- Multi-model support (Gemini, Claude, OpenAI, etc.)
- Natural language instructions for text editing
- Web search capabilities for real-time information
- Transcript summarization and improvement
- Change explanation and analysis
- Efficient conversation management (single assistant message + cumulative user instructions)
- Natural language support for commands like "change it back" or "undo that"

In [None]:
#| export
from typing import List, Dict, Literal, Union
from pydantic import BaseModel, ConfigDict, Field, model_validator
from dotenv import load_dotenv
import re
import json

load_dotenv()

True

## üì¶ Core AI Functions

AI-powered operations using litellm's flexible multi-provider interface.

**Main Functions:**
- `JSONChat()` ‚Äî General-purpose AI chat with multi-model support
- `summarize_transcript()` ‚Äî Generate concise summaries
- `explain_edits()` ‚Äî Natural language explanation of changes
- `improve_transcript()` ‚Äî Flexible text improvement with custom instructions

In [None]:
#| export

# --- Replace all ------------------------------------------------------------

class ReplaceAllOp(BaseModel):
    """Represents a 'replace all' text operation."""
    op: Literal["replace_all"]
    find: str = Field(..., min_length=1)
    replace: str = Field(..., min_length=0)
    model_config = ConfigDict(extra="forbid")


# --- Regex replace ------------------------------------------------------------

class RegexReplaceOp(BaseModel):
    """Represents a regex-based find/replace operation."""
    op: Literal["regex_replace"]
    pattern: str = Field(..., min_length=1)
    replacement: str = Field(..., min_length=0)
    model_config = ConfigDict(extra="forbid")

    @model_validator(mode="after")
    def _validate_regex(cls, v: "RegexReplaceOp"):
        # Precompile regex to ensure it's valid
        try:
            re.compile(v.pattern)
        except re.error as e:
            raise ValueError(f"Invalid regex pattern: {e}") from e
        return v

# --- Insert at absolute position ---------------------------------------------

class InsertAtOp(BaseModel):
    """Insert text at an absolute character position (0-indexed)."""
    op: Literal["insert_at"]
    text: str = Field(..., min_length=1)
    position: int = Field(..., ge=0)
    model_config = ConfigDict(extra="forbid")

# --- Insert after marker ------------------------------------------------------

class InsertAfterOp(BaseModel):
    """Insert text after the first occurrence of a marker string."""
    op: Literal["insert_after"]
    text: str = Field(..., min_length=1)
    after: str = Field(..., min_length=1)
    model_config = ConfigDict(extra="forbid")

# --- Delete -------------------------------------------------------------------

class DeleteOp(BaseModel):
    """Delete exact text (first or all occurrences)."""
    op: Literal["delete"]
    text: str = Field(..., min_length=1)
    all_occurrences: bool = False
    model_config = ConfigDict(extra="forbid")


# --- Edit plan container ------------------------------------------------------

class EditPlan(BaseModel):
    """Represents a list of text operations to apply sequentially."""
    ops: List[
        Union[
            ReplaceAllOp,
            RegexReplaceOp,
            InsertAtOp,
            InsertAfterOp,
            DeleteOp,
        ]
    ]
    model_config = ConfigDict(extra="forbid")

## üß∞ Conversation Management

The AI conversation uses a hybrid context pattern for efficiency.

**Session State:**
- `_messages` ‚Äî conversation history
- `_current` ‚Äî current transcript after applied edits

**Structure:**
- **System message:** defines AI role and available operations
- **Assistant message:** contains current transcript (updated after each edit)
- **User messages:** cumulative instruction history

**Example after 2 edits:**
```json
[
  {"role": "system", "content": "You are a precise text editor..."},
  {"role": "assistant", "content": "Here is the current transcript:\nI met oscar on Monday."},
  {"role": "user", "content": "Instruction: Change him to oscar"},
  {"role": "user", "content": "Instruction: Change yesterday to on Monday"}
]
```

**Functions:**
- `_new_conversation(transcript)` ‚Äî initializes conversation with system and assistant messages
- `_set_current_transcript(new_transcript)` ‚Äî updates assistant message with latest transcript

In [None]:
# | export
# --- session state (module-level) ---
_messages: List[Dict[str, str]] | None = None
_current: str | None = None

def _new_conversation(transcript: str) -> List[Dict[str, str]]:
    """Create a new message list with system + assistant context."""
    return [
        {
            "role": "system",
            "content": (
                "You are a precise text editor that outputs ONLY valid JSON matching the EditPlan schema.\n\n"
                "Available operations:\n"
                "1. replace_all ‚Äî exact literal text only (no regex)\n"
                "   fields:\n"
                "       - find: the exact text to replace\n"
                "       - replace: replacement text for every occurrence\n\n"
                "2. regex_replace - pattern-based replacements (e.g., dates)\n"
                "   fields:\n"
                "       - pattern: regex pattern to match (e.g., (\\d{4})-(\\d{2})-(\\d{2}) for dates)\n"
                "       - replacement: replacement string using \\1, \\2 for capture groups\n\n"
                "3. insert_at ‚Äî insert text at an absolute index (0 = start)\n"
                "   fields:\n"
                "       - text: text to insert\n"
                "       - position: integer index to insert at\n\n"
                "4. insert_after ‚Äî insert text after a marker\n"
                "   fields:\n"
                "       - text: text to insert\n"
                "       - after: insert after the first occurrence of this string\n"
                "       (ALWAYS provide a space in the string if needed when doing insert)\n\n"
                "5. delete ‚Äî remove exact text\n"
                "   fields:\n"
                "       - text: the exact text to remove\n"
                "       - all_occurrences: true = remove all, false = only first (default false)\n\n"
                "RULES:\n"
                "- If you see regex patterns or date formats, you MUST use regex_replace, NOT replace_all!\n"
                "- When interpreting natural or spoken language, infer the user's intent precisely and map it to the correct fields.\n"
                "- ALWAYS provide a space in text to insert if needed.\n"
                "- Respond ONLY with valid JSON following the EditPlan schema."
            ),
        },
        {
            "role": "assistant",
            "content": f"Current text to edit:\n{transcript}",
        },
    ]


def _set_current_transcript(new_transcript: str) -> None:
    global _messages
    # replace the single assistant transcript message
    for m in _messages:
        if m.get("role") == "assistant":
            m["content"] = f"Current text to edit:\n{new_transcript}"
            return
    # Fallback: insert one if missing
    _messages.insert(1, {
        "role": "assistant",
        "content": f"Current text to edit:\n{new_transcript}",
    })

## ü§ñ Core Functions

**`_plan_edits(instruction, model)`**
- Appends user instruction to conversation
- Calls LLM with `response_format=EditPlan` for structured output
- Returns parsed `EditPlan` object

**`_apply_plan(transcript, plan)`**
- Applies all operations in `EditPlan` sequentially to the transcript
- Supports: `replace_all`, `regex_replace`, `insert_at`, `insert_after`, `delete`
- Returns updated transcript

In [None]:
# | export
from litellm import completion


def _plan_edits(instruction: str, model: str = "gemini/gemini-2.0-flash-exp") -> EditPlan:
    """
    Append a user instruction, call the model with structured output, and return the parsed plan.
    """
    global _messages

    # Add the new instruction to the conversation
    _messages.append({"role": "user", "content": f"Instruction: {instruction}"})

    # Use litellm.completion to get structured JSON response
    response = completion(
        model=model,
        messages=_messages,
        response_format={"type": "json_object"},
        temperature=0
    )
    
    # Extract the JSON content from response
    content = response.choices[0].message.content
    
    # Parse JSON and validate with Pydantic
    try:
        data = json.loads(content)
        # Handle case where LLM returns array directly instead of {"ops": [...]}
        if isinstance(data, list):
            data = {"ops": data}
        plan = EditPlan.model_validate(data)
        return plan
    except (json.JSONDecodeError, Exception) as e:
        raise RuntimeError(f"Failed to parse model response as EditPlan: {e}\nResponse: {content}")


def _apply_plan(transcript: str, plan: EditPlan) -> str:
    """
    Apply all operations from the EditPlan to the given transcript.
    """
    updated = transcript
    for op in plan.ops:
        if op.op == "replace_all":
            updated = updated.replace(op.find, op.replace)
        elif op.op == "regex_replace":
            updated = re.sub(op.pattern, op.replacement, updated)
        elif op.op == "insert_at":
            pos = max(0, min(op.position, len(updated)))
            updated = updated[:pos] + op.text + updated[pos:]
        elif op.op == "insert_after":
            idx = updated.find(op.after)
            if idx != -1:
                insert_pos = idx + len(op.after)
                updated = updated[:insert_pos] + op.text + updated[insert_pos:]
        elif op.op == "delete":
            if op.all_occurrences:
                updated = updated.replace(op.text, "")
            else:
                # Delete first occurrence only
                idx = updated.find(op.text)
                if idx != -1:
                    updated = updated[:idx] + updated[idx + len(op.text):]
    return updated

class JSONChat:
    def __init__(self, model="claude-sonnet-4-20250514"):
        self.model = model
        self.history = []

    def __call__(self, prompt: str):
        self.history.append({"role": "user", "content": prompt})
        resp = completion(
            model=self.model,
            messages=self.history,
            response_format={"type": "json_object"}
        )
        content = resp.choices[0].message["content"]
        self.history.append({"role": "assistant", "content": content})
        # Try to parse JSON safely
        try:
            return json.loads(content)
        except json.JSONDecodeError:
            return {"error": "Invalid JSON", "raw": content}


## üîå Public API

Functions for managing edit sessions and applying instructions.

In [None]:
# | export
def has_session() -> bool:
    """Return True if an edit session is initialized."""
    return _messages is not None and _current is not None

def start_session(initial_transcript: str) -> str:
    """Seed a new session with the initial transcript and return it."""
    global _messages, _current
    _current = initial_transcript
    _messages = _new_conversation(initial_transcript)
    return _current

def apply_instruction(instruction: str, model: str = "gemini/gemini-2.5-flash") -> str:
    """Apply an instruction to the current transcript and return the updated text."""
    global _current
    if not has_session():
        raise RuntimeError("No session. Call start_session() first.")
    plan = _plan_edits(instruction, model)
    _current = _apply_plan(_current, plan)
    _set_current_transcript(_current)
    return _current

def current_transcript() -> str:
    """Get the latest edited transcript (or '' if none)."""
    return _current or ""

def reset_session() -> None:
    """Clear session state."""
    global _messages, _current
    _messages, _current = None, None

## üß© Inspecting the Conversation

Print the `_messages` list to see what the model sees on each call.

**Key observations:**
- One assistant message with the current transcript
- Multiple user instructions recording session history

In [None]:
#| eval: false
from pprint import pprint

print("üß© Message history:")
pprint(_messages)


üß© Message history:
[{'content': 'You are a precise text editor that outputs ONLY valid JSON '
             'matching the EditPlan schema.\n'
             '\n'
             'Available operations:\n'
             '1. replace_all ‚Äî exact literal text only (no regex)\n'
             '   fields:\n'
             '       - find: the exact text to replace\n'
             '       - replace: replacement text for every occurrence\n'
             '\n'
             '2. regex_replace - pattern-based replacements (e.g., dates)\n'
             '   fields:\n'
             '       - pattern: regex pattern to match (e.g., '
             '(\\d{4})-(\\d{2})-(\\d{2}) for dates)\n'
             '       - replacement: replacement string using \\1, \\2 for '
             'capture groups\n'
             '\n'
             '3. insert_at ‚Äî insert text at an absolute index (0 = start)\n'
             '   fields:\n'
             '       - text: text to insert\n'
             '       - position: integer index 

## ‚úÖ Summary

**Architecture:**
- Hybrid context: single assistant message (current state) + cumulative user instructions (history)
- Efficient for long transcripts with complex edit sequences
- Supports natural, conversational editing patterns

**Supported Operations:**
1. `replace_all` ‚Äî exact text replacement
2. `regex_replace` ‚Äî pattern-based with capture groups (\1, \2, etc.)
3. `insert_at` ‚Äî insert at character position (0-indexed)
4. `insert_after` ‚Äî insert after marker string
5. `delete` ‚Äî remove first or all occurrences

**Future Enhancements:**
- Token usage tracking
- Operation history with undo/redo
- UI integration (TUI/web)

## üß™ Testing Different Edit Types with Litellm

Let's test various text editing operations using litellm's natural language interface.

In [None]:
#| eval: false
### Test 1: Regex Replace - Format dates
initial_transcript = "Meeting on 2025-10-07 and another on 2025-12-25."
_messages = _new_conversation(initial_transcript)
_current = initial_transcript
# Use regex to convert dates from YYYY-MM-DD to MM/DD/YYYY
plan = _plan_edits("Convert all dates from YYYY-MM-DD format to MM/DD/YYYY format")
_current = _apply_plan(_current, plan)
print(f"Original: {initial_transcript}")
print(f"Result:   {_current}")
print(f"Plan: {plan.model_dump_json(indent=2)}")

Original: Meeting on 2025-10-07 and another on 2025-12-25.
Result:   Meeting on 10/07/2025 and another on 12/25/2025.
Plan: {
  "ops": [
    {
      "op": "regex_replace",
      "pattern": "(\\d{4})-(\\d{2})-(\\d{2})",
      "replacement": "\\2/\\3/\\1"
    }
  ]
}


In [None]:
#| eval: false
### Test 2: Insert Space

test_text = "HelloWorld"
print(f"Original: {test_text}")

instruction = "Add a space between Hello and World. Return only the edited text."
chat = JSONChat()
result = chat(instruction + f"\n\nText: {test_text}")
print(f"Result:   {result}")

Original: HelloWorld
Result:   {'edited_text': 'Hello World'}
Result:   {'edited_text': 'Hello World'}


In [None]:
#| eval: false
### Test 3: Insert After Marker

test_text = "Hello, my name is John. I love coding."
print(f"Original: {test_text}")

instruction = "Add ' Smith' right after 'John'. Return only the edited text."
chat = JSONChat()
result = chat(instruction + f"\n\nText: {test_text}")
print(f"Result:   {result}")

Original: Hello, my name is John. I love coding.
Result:   {'edited_text': 'Hello, my name is John Smith. I love coding.'}
Result:   {'edited_text': 'Hello, my name is John Smith. I love coding.'}


In [None]:

#| eval: false
### Test 5: Delete - All occurrences

test_text = "I like apples and apples are great!"
_messages = _new_conversation(test_text)
_current = test_text

plan = _plan_edits("Delete all occurrences of 'apples' and make sure spacing is correct.")
_current = _apply_plan(_current, plan)
print(f"Original: {test_text}")
print(f"Result:   {_current}")
print(f"Plan: {plan.model_dump_json(indent=2)}")

Original: I like apples and apples are great!
Result:   I like and are great!
Plan: {
  "ops": [
    {
      "op": "delete",
      "text": "apples",
      "all_occurrences": true
    },
    {
      "op": "replace_all",
      "find": "I like  and  are great!",
      "replace": "I like and are great!"
    }
  ]
}


In [None]:
#| eval: false
### Test 6: Complex Multi-Operation Edit (using JSONChat with EditPlan)

test_text = "The meeting is on 2025-10-07 at the office. Please confirm."
print(f"Original: {test_text}")

# Get the schema as JSON for the prompt
schema_json = EditPlan.model_json_schema()

instruction = f"""Return ONLY valid JSON matching this exact schema:

Create operations to edit this text: "{test_text}"

Tasks:
1. Change date from YYYY-MM-DD to MM/DD/YYYY format
2. Change 'office' to 'conference room' (use "op": "replace_all", "find": "office", "replace": "conference room")
3. Add ' (urgent)' right before the period at the end (use "op": "insert_after", "text": " (urgent)", "after": "confirm")

Return ONLY the JSON object with an "ops" array containing these 3 operations."""

chat = JSONChat()
plan_json = chat(instruction)
print(f"Plan JSON: {json.dumps(plan_json, indent=2)}")

# Parse and apply the plan using EditPlan
plan = EditPlan.model_validate(plan_json)
result = _apply_plan(test_text, plan)
print(f"Result:   {result}")

Original: The meeting is on 2025-10-07 at the office. Please confirm.
Plan JSON: {
  "ops": [
    {
      "op": "replace_all",
      "find": "2025-10-07",
      "replace": "10/07/2025"
    },
    {
      "op": "replace_all",
      "find": "office",
      "replace": "conference room"
    },
    {
      "op": "insert_after",
      "text": " (urgent)",
      "after": "confirm"
    }
  ]
}
Result:   The meeting is on 10/07/2025 at the conference room. Please confirm (urgent).
Plan JSON: {
  "ops": [
    {
      "op": "replace_all",
      "find": "2025-10-07",
      "replace": "10/07/2025"
    },
    {
      "op": "replace_all",
      "find": "office",
      "replace": "conference room"
    },
    {
      "op": "insert_after",
      "text": " (urgent)",
      "after": "confirm"
    }
  ]
}
Result:   The meeting is on 10/07/2025 at the conference room. Please confirm (urgent).


## üîç Inspecting Full Response Structure

Let's examine the complete response object returned by litellm's Chat API to understand all available fields and metadata.

In [None]:
#| eval: false
### Example: Complete Response Structure

import json

# Create a Chat instance
chat = Chat("gemini/gemini-2.5-flash")

# Make a simple query
response = chat("What is 2 + 2?")

print("=" * 70)
print("üì¶ FULL RESPONSE OBJECT")
print("=" * 70)
print(f"\nType: {type(response)}")
print(f"\nResponse object: {response}")

print("\n" + "=" * 70)
print("üìã RESPONSE ATTRIBUTES")
print("=" * 70)
print(f"\nAll attributes: {dir(response)}")

print("\n" + "=" * 70)
print("üéØ KEY FIELDS")
print("=" * 70)
print(f"\nresponse.choices: {response.choices}")
print(f"\nType of choices: {type(response.choices)}")
print(f"\nFirst choice: {response.choices[0]}")

print("\n" + "=" * 70)
print("üí¨ MESSAGE CONTENT")
print("=" * 70)
print(f"\nMessage object: {response.choices[0].message}")
print(f"\nContent: {response.choices[0].message.content}")
print(f"\nRole: {response.choices[0].message.role}")

print("\n" + "=" * 70)
print("üìä COMPLETE STRUCTURE (JSON formatted)")
print("=" * 70)
# Convert to dict if possible for better inspection
if hasattr(response, 'model_dump'):
    print(json.dumps(response.model_dump(), indent=2))
elif hasattr(response, '__dict__'):
    print(json.dumps(response.__dict__, indent=2, default=str))
else:
    print(json.dumps(vars(response), indent=2, default=str))

üì¶ FULL RESPONSE OBJECT

Type: <class 'litellm.types.utils.ModelResponse'>

Response object: ModelResponse(id='Hlj3aNnODLHwnsEPr7DCsQY', created=1761040412, model='gemini-2.5-flash', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content='2 + 2 = 4', role='assistant', tool_calls=None, function_call=None, provider_specific_fields=None))], usage=Usage(completion_tokens=29, prompt_tokens=9, total_tokens=38, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=None, audio_tokens=None, reasoning_tokens=22, rejected_prediction_tokens=None, text_tokens=7), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=None, text_tokens=9, image_tokens=None)), vertex_ai_grounding_metadata=[], vertex_ai_url_context_metadata=[], vertex_ai_safety_results=[], vertex_ai_citation_metadata=[])

üìã RESPONSE ATTRIBUTES

All attributes: ['__abstractmethods__', '__annotations__', '__c

In [None]:
#from smolagents import CodeAgent, InferenceClientModel, WebSearchTool
#https://huggingface.co/docs/smolagents/index


# Connect to running vLLM server using OpenAI-compatible API
# model = OpenAIServerModel(
#     model_id="Qwen/Qwen3-4B-Instruct-2507",
#     api_base="http://localhost:8000/v1",
#     api_key="dummy",  # vLLM doesn't require a real API key
#     temperature=0.1,  # Lower temperature for more consistent output
# )

# model = InferenceClientModel()
# agent = CodeAgent(
#     tools=[WebSearchTool()],
#     model = model 
# )

# # Test with a simple question first
# print("Testing simple question...")
# response = agent.run("What is the square root of 75?")
# print(f"Response: {response}")