In [None]:
#| default_exp ai

In [3]:
#| export
from dotenv import load_dotenv
from lisette import *

load_dotenv()

True

In [10]:
#| export
class TranscriptEditor:
    """Manages live transcription with AI-assisted editing capabilities."""
    
    def __init__(self, model: str, temperature: float = 0.1):
        self.chat = Chat(
            model,
            sp="""You are helping with live transcription. As the user speaks, you'll receive each transcribed chunk (Each chunk being a line of text).
Your job is to:
1. Detect when the user wants to edit previous text (e.g., "change that to...", "delete the last part", "replace hamburgers with pizza")
2. When an edit is requested, return ONLY the complete corrected/edited transcript
3. When it's just new text, return ONLY the word "APPEND"
4. Keep the conversation history to understand context

Format your responses as:
- For edits: Return the full corrected transcript, each sentence on its own line
- For new text: APPEND""",
            temp=temperature
        )
        self.full_transcript = ""
        self.total_tokens = 0
    
    def process_chunk(self, chunk: str) -> dict:
        """Process a transcription chunk and determine if it's new text or an edit."""
        
        # Send the chunk with context about current transcript
        response = self.chat(chunk)
        
        result = response.choices[0].message.content.strip()
        tokens = response.usage.total_tokens if hasattr(response, "usage") else 0
        self.total_tokens += tokens
        
        # Determine if it's an append or edit
        if result.startswith("APPEND"):
            action = "append"
        else:
            # It's an edit - replace full transcript
            self.full_transcript = result
            action = "edit"
        
        return {
            "transcript": self.full_transcript,
            "action": action,
            "tokens_used": tokens,
            "total_tokens": self.total_tokens
        }
    
    def get_transcript(self) -> str:
        """Get the current full transcript."""
        return self.full_transcript
    
    def reset(self):
        """Reset the transcript and chat history."""
        self.full_transcript = ""
        self.total_tokens = 0
        self.chat = Chat(
            self.chat.model,
            sp=self.chat.sp,
            temp=self.chat.temp
        )

In [12]:
#| eval: false

# Test the TranscriptEditor
editor = TranscriptEditor("openai/gpt-4o-mini")

# Simulate transcription chunks
chunks = [
    "My name is Batman.\n",
    "I love pizza.\n",
    "This transcriber is working quite well.",
    "Actually, change pizza to hamburgers.",
    "Maybe even delete that first sentence about my name."
]

for chunk in chunks:
    result = editor.process_chunk(chunk)
    print(f"\n--- Chunk: {chunk}")
    print(f"Action: {result['action']}")
    print(f"Current transcript:\n{result['transcript']}")
    print(f"Tokens used: {result['tokens_used']}")


--- Chunk: My name is Batman.

Action: append
Current transcript:

Tokens used: 155

--- Chunk: I love pizza.

Action: append
Current transcript:

Tokens used: 169

--- Chunk: This transcriber is working quite well.
Action: append
Current transcript:

Tokens used: 187

--- Chunk: Actually, change pizza to hamburgers.
Action: edit
Current transcript:
My name is Batman.  
I love hamburgers.  
This transcriber is working quite well.
Tokens used: 223

--- Chunk: Maybe even delete that first sentence about my name.
Action: edit
Current transcript:
I love hamburgers.  
This transcriber is working quite well.
Tokens used: 255
