From 646fd074709693c674e4b99c2153e95b2e0b5ae8 Mon Sep 17 00:00:00 2001
From: Tony Giorgio <tonygiorgio@protonmail.com>
Date: Fri, 12 Sep 2025 18:44:08 -0500
Subject: [PATCH 1/6] feat: add TTS capabilities to chat messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Added speaker icon button next to copy button for assistant messages
- Integrated OpenAI TTS with kokoro model and af_sky+af_bella voice
- Audio plays directly in browser without downloading
- Click speaker icon to play/stop audio
- Made copy and TTS buttons always visible (removed hover-only display)
- Fixed button hover boundaries to prevent overlap

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Bump 1.4.1

feat: add microphone recording with Whisper transcription

- Add RecordRTC library for proper WAV audio recording
- Implement mic button with visual states (orange when recording)
- Configure audio with echo cancellation and noise suppression
- Optimize for speech with 16kHz mono WAV format
- Integrate with OpenSecret SDK's transcribeAudio API
- Append transcribed text to message input field

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 frontend/bun.lock                          |  10 +-
 frontend/package.json                      |   4 +-
 frontend/src/components/ChatBox.tsx        | 106 +++++++++++++++++++-
 frontend/src/routes/_auth.chat.$chatId.tsx | 108 ++++++++++++++++++---
 4 files changed, 213 insertions(+), 15 deletions(-)

diff --git a/frontend/bun.lock b/frontend/bun.lock
index 3ab60275..c6f3e148 100644
--- a/frontend/bun.lock
+++ b/frontend/bun.lock
@@ -4,7 +4,7 @@
     "": {
       "name": "maple",
       "dependencies": {
-        "@opensecret/react": "1.4.0",
+        "@opensecret/react": "1.4.3",
         "@radix-ui/react-alert-dialog": "^1.1.1",
         "@radix-ui/react-avatar": "^1.1.0",
         "@radix-ui/react-dialog": "^1.1.1",
@@ -30,6 +30,7 @@
         "react": "^18.3.1",
         "react-dom": "^18.3.1",
         "react-markdown": "^9.0.1",
+        "recordrtc": "^5.6.2",
         "rehype-highlight": "^7.0.0",
         "rehype-katex": "^7.0.1",
         "rehype-sanitize": "^6.0.0",
@@ -51,6 +52,7 @@
         "@types/node": "^22.3.0",
         "@types/react": "^18.3.3",
         "@types/react-dom": "^18.3.0",
+        "@types/recordrtc": "^5.6.14",
         "@types/uuid": "^10.0.0",
         "@vitejs/plugin-react": "^4.3.1",
         "autoprefixer": "^10.4.20",
@@ -217,7 +219,7 @@
 
     "@nodelib/fs.walk": ["@nodelib/fs.walk@1.2.8", "", { "dependencies": { "@nodelib/fs.scandir": "2.1.5", "fastq": "^1.6.0" } }, "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg=="],
 
-    "@opensecret/react": ["@opensecret/react@1.4.0", "", { "dependencies": { "@peculiar/x509": "^1.12.2", "@stablelib/base64": "^2.0.0", "@stablelib/chacha20poly1305": "^2.0.0", "@stablelib/random": "^2.0.0", "cbor2": "^1.7.0", "tweetnacl": "^1.0.3", "zod": "^3.23.8" }, "peerDependencies": { "react": "^18.0.0 || ^19.0.0", "react-dom": "^18.0.0 || ^19.0.0" } }, "sha512-21L4V1AWoKTzcMKwe4OrM+sj3BKl5ATMODdFwsh+wdRvmYrG/OXf9AXmO7cP9LfsN7Gb1nj3fTmnwU8Gbx//Kw=="],
+    "@opensecret/react": ["@opensecret/react@1.4.3", "", { "dependencies": { "@peculiar/x509": "^1.12.2", "@stablelib/base64": "^2.0.0", "@stablelib/chacha20poly1305": "^2.0.0", "@stablelib/random": "^2.0.0", "cbor2": "^1.7.0", "tweetnacl": "^1.0.3", "zod": "^3.23.8" }, "peerDependencies": { "react": "^18.0.0 || ^19.0.0", "react-dom": "^18.0.0 || ^19.0.0" } }, "sha512-lsBsPRM9tsY9C8y7hHxLe8MOKlvNVI2B3X0XLWUqn/Prm51iAl5anWsRbEhKBvvNvWjlW/gfgHBfx+i2B0tvAw=="],
 
     "@peculiar/asn1-cms": ["@peculiar/asn1-cms@2.3.15", "", { "dependencies": { "@peculiar/asn1-schema": "^2.3.15", "@peculiar/asn1-x509": "^2.3.15", "@peculiar/asn1-x509-attr": "^2.3.15", "asn1js": "^3.0.5", "tslib": "^2.8.1" } }, "sha512-B+DoudF+TCrxoJSTjjcY8Mmu+lbv8e7pXGWrhNp2/EGJp9EEcpzjBCar7puU57sGifyzaRVM03oD5L7t7PghQg=="],
 
@@ -469,6 +471,8 @@
 
     "@types/react-dom": ["@types/react-dom@18.3.5", "", { "peerDependencies": { "@types/react": "^18.0.0" } }, "sha512-P4t6saawp+b/dFrUr2cvkVsfvPguwsxtH6dNIYRllMsefqFzkZk5UIjzyDOv5g1dXIPdG4Sp1yCR4Z6RCUsG/Q=="],
 
+    "@types/recordrtc": ["@types/recordrtc@5.6.14", "", {}, "sha512-Reiy1sl11xP0r6w8DW3iQjc1BgXFyNC7aDuutysIjpFoqyftbQps9xPA2FoBkfVXpJM61betgYPNt+v65zvMhA=="],
+
     "@types/unist": ["@types/unist@3.0.3", "", {}, "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="],
 
     "@types/uuid": ["@types/uuid@10.0.0", "", {}, "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ=="],
@@ -1037,6 +1041,8 @@
 
     "readdirp": ["readdirp@3.6.0", "", { "dependencies": { "picomatch": "^2.2.1" } }, "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA=="],
 
+    "recordrtc": ["recordrtc@5.6.2", "", {}, "sha512-1QNKKNtl7+KcwD1lyOgP3ZlbiJ1d0HtXnypUy7yq49xEERxk31PHvE9RCciDrulPCY7WJ+oz0R9hpNxgsIurGQ=="],
+
     "reflect-metadata": ["reflect-metadata@0.2.2", "", {}, "sha512-urBwgfrvVP/eAyXx4hluJivBKzuEbSQs9rKWCrCkbSxNv8mxPcUZKeuoF3Uy4mJl3Lwprp6yy5/39VWigZ4K6Q=="],
 
     "rehype-highlight": ["rehype-highlight@7.0.2", "", { "dependencies": { "@types/hast": "^3.0.0", "hast-util-to-text": "^4.0.0", "lowlight": "^3.0.0", "unist-util-visit": "^5.0.0", "vfile": "^6.0.0" } }, "sha512-k158pK7wdC2qL3M5NcZROZ2tR/l7zOzjxXd5VGdcfIyoijjQqpHd3JKtYSBDpDZ38UI2WJWuFAtkMDxmx5kstA=="],
diff --git a/frontend/package.json b/frontend/package.json
index b2d1880c..1a687360 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -16,7 +16,7 @@
     "mdast-util-gfm-autolink-literal": "2.0.0"
   },
   "dependencies": {
-    "@opensecret/react": "1.4.0",
+    "@opensecret/react": "1.4.3",
     "@radix-ui/react-alert-dialog": "^1.1.1",
     "@radix-ui/react-avatar": "^1.1.0",
     "@radix-ui/react-dialog": "^1.1.1",
@@ -42,6 +42,7 @@
     "react": "^18.3.1",
     "react-dom": "^18.3.1",
     "react-markdown": "^9.0.1",
+    "recordrtc": "^5.6.2",
     "rehype-highlight": "^7.0.0",
     "rehype-katex": "^7.0.1",
     "rehype-sanitize": "^6.0.0",
@@ -63,6 +64,7 @@
     "@types/node": "^22.3.0",
     "@types/react": "^18.3.3",
     "@types/react-dom": "^18.3.0",
+    "@types/recordrtc": "^5.6.14",
     "@types/uuid": "^10.0.0",
     "@vitejs/plugin-react": "^4.3.1",
     "autoprefixer": "^10.4.20",
diff --git a/frontend/src/components/ChatBox.tsx b/frontend/src/components/ChatBox.tsx
index aa1eb17a..e4f4ff5e 100644
--- a/frontend/src/components/ChatBox.tsx
+++ b/frontend/src/components/ChatBox.tsx
@@ -1,4 +1,5 @@
-import { CornerRightUp, Bot, Image, X, FileText, Loader2, Plus } from "lucide-react";
+import { CornerRightUp, Bot, Image, X, FileText, Loader2, Plus, Mic } from "lucide-react";
+import RecordRTC from "recordrtc";
 import { Button } from "@/components/ui/button";
 import { Label } from "@/components/ui/label";
 import {
@@ -244,6 +245,12 @@ export default function Component({
   const os = useOpenSecret();
   const navigate = useNavigate();
 
+  // Audio recording state
+  const [isRecording, setIsRecording] = useState(false);
+  const [isTranscribing, setIsTranscribing] = useState(false);
+  const recorderRef = useRef<RecordRTC | null>(null);
+  const streamRef = useRef<MediaStream | null>(null);
+
   // Find the first vision-capable model the user has access to
   const findFirstVisionModel = () => {
     // Check if user has Pro/Team access
@@ -471,6 +478,83 @@ export default function Component({
     setUploadedDocument(null);
     setDocumentError(null);
   };
+
+  // Audio recording functions
+  const startRecording = async () => {
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({
+        audio: {
+          echoCancellation: true,
+          noiseSuppression: true,
+          sampleRate: 44100
+        }
+      });
+
+      streamRef.current = stream;
+
+      // Create RecordRTC instance configured for WAV
+      const recorder = new RecordRTC(stream, {
+        type: "audio",
+        mimeType: "audio/wav",
+        recorderType: RecordRTC.StereoAudioRecorder,
+        numberOfAudioChannels: 1, // Mono audio for smaller file size
+        desiredSampRate: 16000, // 16kHz is good for speech
+        timeSlice: 1000 // Get data every second (optional)
+      });
+
+      recorderRef.current = recorder;
+      recorder.startRecording();
+      setIsRecording(true);
+    } catch (error) {
+      console.error("Failed to start recording:", error);
+      alert("Failed to access microphone. Please check your permissions.");
+    }
+  };
+
+  const stopRecording = () => {
+    if (recorderRef.current && isRecording) {
+      recorderRef.current.stopRecording(async () => {
+        const blob = recorderRef.current!.getBlob();
+
+        // Create a proper WAV file
+        const audioFile = new File([blob], "recording.wav", {
+          type: "audio/wav"
+        });
+
+        setIsTranscribing(true);
+        try {
+          const result = await os.transcribeAudio(audioFile, "whisper-large-v3");
+
+          // Append transcribed text to existing input
+          setInputValue((prev) => {
+            const newValue = prev ? `${prev} ${result.text}` : result.text;
+            return newValue;
+          });
+        } catch (error) {
+          console.error("Transcription failed:", error);
+        } finally {
+          setIsTranscribing(false);
+        }
+
+        // Clean up
+        if (streamRef.current) {
+          streamRef.current.getTracks().forEach((track) => track.stop());
+          streamRef.current = null;
+        }
+        recorderRef.current = null;
+      });
+
+      setIsRecording(false);
+    }
+  };
+
+  const toggleRecording = () => {
+    if (isRecording) {
+      stopRecording();
+    } else {
+      startRecording();
+    }
+  };
   const [isFocused, setIsFocused] = useState(false);
   const inputRef = useRef<HTMLTextAreaElement>(null);
   const systemPromptRef = useRef<HTMLTextAreaElement>(null);
@@ -901,6 +985,26 @@ export default function Component({
             className="hidden"
           />
 
+          {/* Microphone button */}
+          <Button
+            type="button"
+            size="sm"
+            variant="ghost"
+            className="ml-2"
+            onClick={toggleRecording}
+            disabled={isTranscribing || isInputDisabled}
+            aria-label={isRecording ? "Stop recording" : "Start recording"}
+            data-testid="mic-button"
+          >
+            {isTranscribing ? (
+              <Loader2 className="h-4 w-4 animate-spin" />
+            ) : isRecording ? (
+              <Mic className="h-4 w-4 text-orange-500" />
+            ) : (
+              <Mic className="h-4 w-4" />
+            )}
+          </Button>
+
           {/* Consolidated upload button - show for all users */}
           {!uploadedDocument && (
             <DropdownMenu>
diff --git a/frontend/src/routes/_auth.chat.$chatId.tsx b/frontend/src/routes/_auth.chat.$chatId.tsx
index 582bb11f..3977d6e6 100644
--- a/frontend/src/routes/_auth.chat.$chatId.tsx
+++ b/frontend/src/routes/_auth.chat.$chatId.tsx
@@ -1,6 +1,16 @@
 import { useEffect, useRef, useState, useCallback } from "react";
 import { createFileRoute } from "@tanstack/react-router";
-import { AsteriskIcon, Check, Copy, UserIcon, ChevronDown, Bot, SquarePenIcon } from "lucide-react";
+import {
+  AsteriskIcon,
+  Check,
+  Copy,
+  UserIcon,
+  ChevronDown,
+  Bot,
+  SquarePenIcon,
+  Volume2,
+  Square
+} from "lucide-react";
 import ChatBox from "@/components/ChatBox";
 import { useOpenAI } from "@/ai/useOpenAi";
 import { useLocalState } from "@/state/useLocalState";
@@ -71,6 +81,71 @@ function SystemMessage({
 }) {
   const textWithoutThinking = stripThinkingTags(text);
   const { isCopied, handleCopy } = useCopyToClipboard(textWithoutThinking);
+  const [isPlaying, setIsPlaying] = useState(false);
+  const audioRef = useRef<HTMLAudioElement | null>(null);
+  const openai = useOpenAI();
+
+  const handleTTS = useCallback(async () => {
+    if (isPlaying) {
+      // Stop playing
+      if (audioRef.current) {
+        audioRef.current.pause();
+        audioRef.current = null;
+      }
+      setIsPlaying(false);
+      return;
+    }
+
+    try {
+      setIsPlaying(true);
+
+      // Generate speech using OpenAI TTS
+      const response = await openai.audio.speech.create({
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        model: "kokoro" as any,
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        voice: "af_sky+af_bella" as any,
+        input: textWithoutThinking,
+        response_format: "mp3"
+      });
+
+      // Convert response to blob and create audio URL
+      const blob = new Blob([await response.arrayBuffer()], { type: "audio/mp3" });
+      const audioUrl = URL.createObjectURL(blob);
+
+      // Create and play audio
+      const audio = new Audio(audioUrl);
+      audioRef.current = audio;
+
+      audio.onended = () => {
+        setIsPlaying(false);
+        URL.revokeObjectURL(audioUrl);
+        audioRef.current = null;
+      };
+
+      audio.onerror = () => {
+        console.error("Error playing audio");
+        setIsPlaying(false);
+        URL.revokeObjectURL(audioUrl);
+        audioRef.current = null;
+      };
+
+      await audio.play();
+    } catch (error) {
+      console.error("TTS error:", error);
+      setIsPlaying(false);
+    }
+  }, [textWithoutThinking, isPlaying, openai]);
+
+  // Cleanup on unmount
+  useEffect(() => {
+    return () => {
+      if (audioRef.current) {
+        audioRef.current.pause();
+        audioRef.current = null;
+      }
+    };
+  }, []);
 
   return (
     <div className="group flex flex-col p-4">
@@ -80,15 +155,26 @@ function SystemMessage({
         </div>
         <div className="flex flex-col gap-2">
           <Markdown content={text} loading={loading} chatId={chatId} />
-          <Button
-            variant="ghost"
-            size="sm"
-            className="self-start -mx-2 -mb-2 group-hover:opacity-100 opacity-0 transition-opacity"
-            onClick={handleCopy}
-            aria-label={isCopied ? "Copied" : "Copy to clipboard"}
-          >
-            {isCopied ? <Check className="h-4 w-4" /> : <Copy className="h-4 w-4" />}
-          </Button>
+          <div className="flex gap-2 items-center">
+            <Button
+              variant="ghost"
+              size="sm"
+              className="h-8 w-8 p-0"
+              onClick={handleCopy}
+              aria-label={isCopied ? "Copied" : "Copy to clipboard"}
+            >
+              {isCopied ? <Check className="h-4 w-4" /> : <Copy className="h-4 w-4" />}
+            </Button>
+            <Button
+              variant="ghost"
+              size="sm"
+              className="h-8 w-8 p-0"
+              onClick={handleTTS}
+              aria-label={isPlaying ? "Stop audio" : "Play audio"}
+            >
+              {isPlaying ? <Square className="h-4 w-4" /> : <Volume2 className="h-4 w-4" />}
+            </Button>
+          </div>
         </div>
       </div>
     </div>
@@ -145,7 +231,7 @@ function SystemPromptMessage({ text }: { text: string }) {
           <Button
             variant="ghost"
             size="sm"
-            className="self-start -mx-2 -mb-2 group-hover:opacity-100 opacity-0 transition-opacity"
+            className="h-8 w-8 p-0 self-start"
             onClick={handleCopy}
             aria-label={isCopied ? "Copied" : "Copy to clipboard"}
           >

From fdccbc6a47b744c318f8f4d1e805e27e428e6d44 Mon Sep 17 00:00:00 2001
From: Tony Giorgio <tonygiorgio@protonmail.com>
Date: Sat, 13 Sep 2025 15:12:22 -0500
Subject: [PATCH 2/6] feat: update model names and improve model filtering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Change Llama model canonical name from llama3-3-70b to llama-3.3-70b
- Add backward compatibility mapping for old model names
- Filter out whisper models from selector but detect availability for mic button
- Hide qwen3-coder-30b-a3b from model selector
- Conditionally show mic button only when whisper-large-v3 is available

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 frontend/src/components/ChatBox.tsx           | 43 ++++++++++---------
 frontend/src/components/ModelSelector.tsx     | 32 ++++++++++----
 .../components/apikeys/ProxyConfigSection.tsx |  4 +-
 frontend/src/state/LocalStateContext.tsx      |  9 +++-
 frontend/src/state/LocalStateContextDef.ts    |  5 +++
 frontend/src/utils/utils.ts                   |  9 ++--
 6 files changed, 68 insertions(+), 34 deletions(-)

diff --git a/frontend/src/components/ChatBox.tsx b/frontend/src/components/ChatBox.tsx
index e4f4ff5e..161c3a27 100644
--- a/frontend/src/components/ChatBox.tsx
+++ b/frontend/src/components/ChatBox.tsx
@@ -226,7 +226,8 @@ export default function Component({
     clearDraftMessage,
     model,
     setModel,
-    availableModels
+    availableModels,
+    hasWhisperModel
   } = useLocalState();
 
   const supportsVision = MODEL_CONFIG[model]?.supportsVision || false;
@@ -985,25 +986,27 @@ export default function Component({
             className="hidden"
           />
 
-          {/* Microphone button */}
-          <Button
-            type="button"
-            size="sm"
-            variant="ghost"
-            className="ml-2"
-            onClick={toggleRecording}
-            disabled={isTranscribing || isInputDisabled}
-            aria-label={isRecording ? "Stop recording" : "Start recording"}
-            data-testid="mic-button"
-          >
-            {isTranscribing ? (
-              <Loader2 className="h-4 w-4 animate-spin" />
-            ) : isRecording ? (
-              <Mic className="h-4 w-4 text-orange-500" />
-            ) : (
-              <Mic className="h-4 w-4" />
-            )}
-          </Button>
+          {/* Microphone button - only show if whisper model is available */}
+          {hasWhisperModel && (
+            <Button
+              type="button"
+              size="sm"
+              variant="ghost"
+              className="ml-2"
+              onClick={toggleRecording}
+              disabled={isTranscribing || isInputDisabled}
+              aria-label={isRecording ? "Stop recording" : "Start recording"}
+              data-testid="mic-button"
+            >
+              {isTranscribing ? (
+                <Loader2 className="h-4 w-4 animate-spin" />
+              ) : isRecording ? (
+                <Mic className="h-4 w-4 text-orange-500" />
+              ) : (
+                <Mic className="h-4 w-4" />
+              )}
+            </Button>
+          )}
 
           {/* Consolidated upload button - show for all users */}
           {!uploadedDocument && (
diff --git a/frontend/src/components/ModelSelector.tsx b/frontend/src/components/ModelSelector.tsx
index 4d3843e6..9e58644e 100644
--- a/frontend/src/components/ModelSelector.tsx
+++ b/frontend/src/components/ModelSelector.tsx
@@ -28,7 +28,7 @@ export const MODEL_CONFIG: Record<string, ModelCfg> = {
     displayName: "Llama 3.3 70B",
     tokenLimit: 70000
   },
-  "llama3-3-70b": {
+  "llama-3.3-70b": {
     displayName: "Llama 3.3 70B",
     tokenLimit: 70000
   },
@@ -89,7 +89,14 @@ export function ModelSelector({
   messages?: ChatMessage[];
   draftImages?: File[];
 }) {
-  const { model, setModel, availableModels, setAvailableModels, billingStatus } = useLocalState();
+  const {
+    model,
+    setModel,
+    availableModels,
+    setAvailableModels,
+    billingStatus,
+    setHasWhisperModel
+  } = useLocalState();
   const os = useOpenSecret();
   const navigate = useNavigate();
   const isFetching = useRef(false);
@@ -116,6 +123,10 @@ export function ModelSelector({
       isFetching.current = true;
       os.fetchModels()
         .then((models) => {
+          // Check if whisper-large-v3 is available before filtering
+          const hasWhisper = models.some((model) => model.id === "whisper-large-v3");
+          setHasWhisperModel(hasWhisper);
+
           // Filter out embedding models and "latest"
           interface ModelWithTasks extends Model {
             tasks?: string[];
@@ -123,6 +134,16 @@ export function ModelSelector({
           const filteredModels = models.filter((model) => {
             if (model.id === "latest") return false;
 
+            // Filter out whisper models (transcription)
+            if (model.id.toLowerCase().includes("whisper")) {
+              return false;
+            }
+
+            // Filter out qwen3-coder-30b-a3b
+            if (model.id === "qwen3-coder-30b-a3b") {
+              return false;
+            }
+
             // Filter out models with lowercase 'instruct' or 'embed' in their ID
             if (model.id.includes("instruct") || model.id.includes("embed")) {
               return false;
@@ -140,11 +161,6 @@ export function ModelSelector({
               return false;
             }
 
-            // Filter out transcription models like Whisper
-            if (modelWithTasks.tasks.includes("transcribe")) {
-              return false;
-            }
-
             return true;
           });
 
@@ -167,7 +183,7 @@ export function ModelSelector({
           isFetching.current = false;
         });
     }
-  }, [os, setAvailableModels]);
+  }, [os, setAvailableModels, setHasWhisperModel]);
 
   // Check if user has access to a model based on their plan
   const hasAccessToModel = (modelId: string) => {
diff --git a/frontend/src/components/apikeys/ProxyConfigSection.tsx b/frontend/src/components/apikeys/ProxyConfigSection.tsx
index 7b91b61e..02d7718c 100644
--- a/frontend/src/components/apikeys/ProxyConfigSection.tsx
+++ b/frontend/src/components/apikeys/ProxyConfigSection.tsx
@@ -343,7 +343,7 @@ client = OpenAI(
 )
 
 response = client.chat.completions.create(
-  model="llama3-3-70b",
+  model="llama-3.3-70b",
   messages=[{"role": "user", "content": "Hello!"}],
   stream=True
 )
@@ -361,7 +361,7 @@ for chunk in response:
                 <code>{`curl -N http://${config.host}:${config.port}/v1/chat/completions \\
   -H "Content-Type: application/json" \\
   -d '{
-    "model": "llama3-3-70b",
+    "model": "llama-3.3-70b",
     "messages": [{"role": "user", "content": "Hello!"}],
     "stream": true
   }'`}</code>
diff --git a/frontend/src/state/LocalStateContext.tsx b/frontend/src/state/LocalStateContext.tsx
index 9d92bacd..41f35fa7 100644
--- a/frontend/src/state/LocalStateContext.tsx
+++ b/frontend/src/state/LocalStateContext.tsx
@@ -12,7 +12,7 @@ export {
   type LocalState
 } from "./LocalStateContextDef";
 
-export const DEFAULT_MODEL_ID = "llama3-3-70b";
+export const DEFAULT_MODEL_ID = "llama-3.3-70b";
 
 export const LocalStateProvider = ({ children }: { children: React.ReactNode }) => {
   /** The model that should be assumed when a chat doesn't yet have one */
@@ -39,6 +39,7 @@ export const LocalStateProvider = ({ children }: { children: React.ReactNode })
         }
       })(),
     availableModels: [llamaModel] as OpenSecretModel[],
+    hasWhisperModel: true, // Default to true to avoid hiding button during loading
     billingStatus: null as BillingStatus | null,
     searchQuery: "",
     isSearchVisible: false,
@@ -287,6 +288,10 @@ export const LocalStateProvider = ({ children }: { children: React.ReactNode })
     setLocalState((prev) => ({ ...prev, availableModels: models }));
   }
 
+  function setHasWhisperModel(hasWhisper: boolean) {
+    setLocalState((prev) => ({ ...prev, hasWhisperModel: hasWhisper }));
+  }
+
   return (
     <LocalStateContext.Provider
       value={{
@@ -294,6 +299,8 @@ export const LocalStateProvider = ({ children }: { children: React.ReactNode })
         availableModels: localState.availableModels,
         setModel,
         setAvailableModels,
+        hasWhisperModel: localState.hasWhisperModel,
+        setHasWhisperModel,
         userPrompt: localState.userPrompt,
         systemPrompt: localState.systemPrompt,
         userImages: localState.userImages,
diff --git a/frontend/src/state/LocalStateContextDef.ts b/frontend/src/state/LocalStateContextDef.ts
index 19abef5b..b9f4b2ac 100644
--- a/frontend/src/state/LocalStateContextDef.ts
+++ b/frontend/src/state/LocalStateContextDef.ts
@@ -41,6 +41,9 @@ export type LocalState = {
   availableModels: OpenSecretModel[];
   setModel: (model: string) => void;
   setAvailableModels: (models: OpenSecretModel[]) => void;
+  /** Whether the whisper transcription model is available */
+  hasWhisperModel: boolean;
+  setHasWhisperModel: (hasWhisper: boolean) => void;
   userPrompt: string;
   systemPrompt: string | null;
   userImages: File[];
@@ -77,6 +80,8 @@ export const LocalStateContext = createContext<LocalState>({
   availableModels: [],
   setModel: () => void 0,
   setAvailableModels: () => void 0,
+  hasWhisperModel: true,
+  setHasWhisperModel: () => void 0,
   userPrompt: "",
   systemPrompt: null,
   userImages: [],
diff --git a/frontend/src/utils/utils.ts b/frontend/src/utils/utils.ts
index bcba8028..3ff75e0c 100644
--- a/frontend/src/utils/utils.ts
+++ b/frontend/src/utils/utils.ts
@@ -81,9 +81,12 @@ export function useClickOutside(
 export function aliasModelName(modelName: string | undefined): string {
   if (!modelName) return "";
 
-  // Map old complicated model name to new simplified name
-  if (modelName === "ibnzterrell/Meta-Llama-3.3-70B-Instruct-AWQ-INT4") {
-    return "llama3-3-70b";
+  // Map old model names to new simplified name
+  if (
+    modelName === "ibnzterrell/Meta-Llama-3.3-70B-Instruct-AWQ-INT4" ||
+    modelName === "llama3-3-70b"
+  ) {
+    return "llama-3.3-70b";
   }
 
   return modelName;

From 6e8a78771ab57098dce2b1c1e583b9a750124497 Mon Sep 17 00:00:00 2001
From: Tony Giorgio <tonygiorgio@protonmail.com>
Date: Sat, 13 Sep 2025 16:39:17 -0500
Subject: [PATCH 3/6] feat: add premium feature gating with upgrade prompts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove Plus dropdown button, add separate Image and Mic buttons
- Add feature gating for Image (Starter+), Voice (Pro+), and TTS (Pro+)
- Create reusable UpgradePromptDialog for all premium features
- Add upgrade prompts for locked models in ModelSelector
- Simplify pricing config with consolidated model listings
- Emphasize privacy/encryption in all upgrade messaging
- Grey out restricted features with 50% opacity for free users

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 frontend/src/components/ChatBox.tsx           | 150 ++++++-------
 frontend/src/components/ModelSelector.tsx     | 201 +++++++++---------
 .../src/components/UpgradePromptDialog.tsx    | 139 ++++++++++++
 frontend/src/config/pricingConfig.tsx         |  85 ++------
 frontend/src/routes/_auth.chat.$chatId.tsx    |  42 +++-
 5 files changed, 359 insertions(+), 258 deletions(-)
 create mode 100644 frontend/src/components/UpgradePromptDialog.tsx

diff --git a/frontend/src/components/ChatBox.tsx b/frontend/src/components/ChatBox.tsx
index 161c3a27..3b0b38da 100644
--- a/frontend/src/components/ChatBox.tsx
+++ b/frontend/src/components/ChatBox.tsx
@@ -1,13 +1,8 @@
-import { CornerRightUp, Bot, Image, X, FileText, Loader2, Plus, Mic } from "lucide-react";
+import { CornerRightUp, Bot, Image, X, FileText, Loader2, Mic } from "lucide-react";
 import RecordRTC from "recordrtc";
 import { Button } from "@/components/ui/button";
 import { Label } from "@/components/ui/label";
-import {
-  DropdownMenu,
-  DropdownMenuContent,
-  DropdownMenuItem,
-  DropdownMenuTrigger
-} from "@/components/ui/dropdown-menu";
+import { UpgradePromptDialog } from "@/components/UpgradePromptDialog";
 import { useEffect, useRef, useState, useMemo } from "react";
 import { useLocalState } from "@/state/useLocalState";
 import { cn, useIsMobile } from "@/utils/utils";
@@ -243,8 +238,9 @@ export default function Component({
   const [imageError, setImageError] = useState<string | null>(null);
   const fileInputRef = useRef<HTMLInputElement>(null);
   const documentInputRef = useRef<HTMLInputElement>(null);
+  const [upgradeDialogOpen, setUpgradeDialogOpen] = useState(false);
+  const [upgradeFeature, setUpgradeFeature] = useState<"image" | "voice">("image");
   const os = useOpenSecret();
-  const navigate = useNavigate();
 
   // Audio recording state
   const [isRecording, setIsRecording] = useState(false);
@@ -596,7 +592,16 @@ export default function Component({
       freshBillingStatus.product_name?.toLowerCase().includes("max") ||
       freshBillingStatus.product_name?.toLowerCase().includes("team"));
 
-  const canUseDocuments = hasProTeamAccess;
+  // Check if user has access to Starter features (Starter plan and above)
+  const hasStarterAccess =
+    freshBillingStatus &&
+    (freshBillingStatus.product_name?.toLowerCase().includes("starter") ||
+      freshBillingStatus.product_name?.toLowerCase().includes("pro") ||
+      freshBillingStatus.product_name?.toLowerCase().includes("max") ||
+      freshBillingStatus.product_name?.toLowerCase().includes("team"));
+
+  const canUseImages = hasStarterAccess;
+  const canUseVoice = hasProTeamAccess;
 
   const handleSubmit = (e?: React.FormEvent) => {
     e?.preventDefault();
@@ -986,14 +991,51 @@ export default function Component({
             className="hidden"
           />
 
-          {/* Microphone button - only show if whisper model is available */}
+          {/* Image upload button - show for all users */}
+          {!uploadedDocument && (
+            <Button
+              type="button"
+              size="sm"
+              variant="ghost"
+              className={cn("ml-2", !canUseImages && "opacity-50")}
+              onClick={() => {
+                if (!canUseImages) {
+                  setUpgradeFeature("image");
+                  setUpgradeDialogOpen(true);
+                } else {
+                  // If not on a vision model, switch to one first
+                  if (!supportsVision) {
+                    const visionModelId = findFirstVisionModel();
+                    if (visionModelId) {
+                      setModel(visionModelId);
+                    }
+                  }
+                  fileInputRef.current?.click();
+                }
+              }}
+              disabled={isInputDisabled}
+              aria-label="Upload images"
+              data-testid="image-upload-button"
+            >
+              <Image className="h-4 w-4" />
+            </Button>
+          )}
+
+          {/* Microphone button - show if whisper model is available */}
           {hasWhisperModel && (
             <Button
               type="button"
               size="sm"
               variant="ghost"
-              className="ml-2"
-              onClick={toggleRecording}
+              className={cn("ml-2", !canUseVoice && "opacity-50")}
+              onClick={() => {
+                if (!canUseVoice) {
+                  setUpgradeFeature("voice");
+                  setUpgradeDialogOpen(true);
+                } else {
+                  toggleRecording();
+                }
+              }}
               disabled={isTranscribing || isInputDisabled}
               aria-label={isRecording ? "Stop recording" : "Start recording"}
               data-testid="mic-button"
@@ -1008,83 +1050,6 @@ export default function Component({
             </Button>
           )}
 
-          {/* Consolidated upload button - show for all users */}
-          {!uploadedDocument && (
-            <DropdownMenu>
-              <DropdownMenuTrigger asChild>
-                <Button
-                  type="button"
-                  size="sm"
-                  variant="ghost"
-                  className="ml-2"
-                  aria-label="Upload files"
-                  data-testid="file-upload-button"
-                >
-                  <Plus className="h-4 w-4" />
-                </Button>
-              </DropdownMenuTrigger>
-              <DropdownMenuContent
-                align="start"
-                className={cn(hasProTeamAccess && canUseDocuments ? "w-44" : "w-56")}
-              >
-                <DropdownMenuItem
-                  onClick={() => {
-                    if (!hasProTeamAccess) {
-                      navigate({ to: "/pricing" });
-                    } else {
-                      // If not on a vision model, switch to one first
-                      if (!supportsVision) {
-                        const visionModelId = findFirstVisionModel();
-                        if (visionModelId) {
-                          setModel(visionModelId);
-                        }
-                      }
-                      fileInputRef.current?.click();
-                    }
-                  }}
-                  className={cn(
-                    "flex items-center gap-2 group",
-                    !hasProTeamAccess && "hover:bg-purple-50 dark:hover:bg-purple-950/20"
-                  )}
-                >
-                  <Image className="h-4 w-4 shrink-0" />
-                  <span>Upload Images</span>
-                  {!hasProTeamAccess && (
-                    <>
-                      <span className="ml-auto text-[10px] px-1.5 py-0.5 rounded-sm font-medium bg-gradient-to-r from-purple-500/10 to-blue-500/10 text-purple-600 dark:text-purple-400">
-                        Pro
-                      </span>
-                      <span className="text-[10px] text-purple-600 dark:text-purple-400 opacity-0 group-hover:opacity-100 transition-opacity">
-                        Upgrade?
-                      </span>
-                    </>
-                  )}
-                </DropdownMenuItem>
-                {/* Document upload temporarily removed - will be re-added later
-                <DropdownMenuItem
-                  onClick={(e) => {
-                    e.preventDefault();
-                    // Temporarily disabled - remove this condition when re-enabling
-                    // if (!canUseDocuments) {
-                    //   navigate({ to: "/pricing" });
-                    // } else {
-                    //   documentInputRef.current?.click();
-                    // }
-                  }}
-                  className={cn("flex items-center gap-2 cursor-not-allowed opacity-50")}
-                  disabled
-                >
-                  <FileText className="h-4 w-4 shrink-0" />
-                  <div className="flex flex-col">
-                    <span>Upload Document</span>
-                    <span className="text-xs text-muted-foreground">Temporarily Unavailable</span>
-                  </div>
-                </DropdownMenuItem>
-                */}
-              </DropdownMenuContent>
-            </DropdownMenu>
-          )}
-
           <Button
             type="submit"
             size="sm"
@@ -1098,6 +1063,13 @@ export default function Component({
           </Button>
         </div>
       </form>
+
+      {/* Upgrade prompt dialog */}
+      <UpgradePromptDialog
+        open={upgradeDialogOpen}
+        onOpenChange={setUpgradeDialogOpen}
+        feature={upgradeFeature}
+      />
     </div>
   );
 }
diff --git a/frontend/src/components/ModelSelector.tsx b/frontend/src/components/ModelSelector.tsx
index 9e58644e..c5345e21 100644
--- a/frontend/src/components/ModelSelector.tsx
+++ b/frontend/src/components/ModelSelector.tsx
@@ -8,9 +8,9 @@ import {
 } from "@/components/ui/dropdown-menu";
 import { useLocalState } from "@/state/useLocalState";
 import { useOpenSecret } from "@opensecret/react";
-import { useEffect, useRef } from "react";
-import { useNavigate } from "@tanstack/react-router";
+import { useEffect, useRef, useState } from "react";
 import type { Model } from "openai/resources/models.js";
+import { UpgradePromptDialog } from "@/components/UpgradePromptDialog";
 
 // Model configuration for display names, badges, and token limits
 type ModelCfg = {
@@ -98,10 +98,11 @@ export function ModelSelector({
     setHasWhisperModel
   } = useLocalState();
   const os = useOpenSecret();
-  const navigate = useNavigate();
   const isFetching = useRef(false);
   const hasFetched = useRef(false);
   const availableModelsRef = useRef(availableModels);
+  const [upgradeDialogOpen, setUpgradeDialogOpen] = useState(false);
+  const [selectedModelName, setSelectedModelName] = useState<string>("");
 
   // Check if chat contains any images or if there are draft images
   const chatHasImages =
@@ -283,99 +284,105 @@ export function ModelSelector({
   // Always show dropdown even with single model (it may be loading more)
 
   return (
-    <DropdownMenu>
-      <DropdownMenuTrigger asChild>
-        <Button
-          variant="ghost"
-          size="sm"
-          className="h-8 gap-1 px-2"
-          data-testid="model-selector-button"
-          aria-label={`Current model: ${MODEL_CONFIG[model]?.displayName || model}. Click to change model.`}
-        >
-          {modelDisplay}
-          <ChevronDown className="h-3 w-3 opacity-50" />
-        </Button>
-      </DropdownMenuTrigger>
-      <DropdownMenuContent align="start" className="w-72">
-        {availableModels &&
-          Array.isArray(availableModels) &&
-          // Sort models: vision-capable first (if images present), then available, then restricted, then disabled
-          [...availableModels]
-            .sort((a, b) => {
-              const aConfig = MODEL_CONFIG[a.id];
-              const bConfig = MODEL_CONFIG[b.id];
-
-              // If chat has images, prioritize vision models
-              if (chatHasImages) {
-                const aHasVision = aConfig?.supportsVision || false;
-                const bHasVision = bConfig?.supportsVision || false;
-                if (aHasVision && !bHasVision) return -1;
-                if (!aHasVision && bHasVision) return 1;
-              }
-
-              // Unknown models are treated as disabled
-              const aDisabled = aConfig?.disabled || !aConfig;
-              const bDisabled = bConfig?.disabled || !bConfig;
-              const aRestricted =
-                (aConfig?.requiresPro || aConfig?.requiresStarter || false) &&
-                !hasAccessToModel(a.id);
-              const bRestricted =
-                (bConfig?.requiresPro || bConfig?.requiresStarter || false) &&
-                !hasAccessToModel(b.id);
-
-              // Disabled models go last
-              if (aDisabled && !bDisabled) return 1;
-              if (!aDisabled && bDisabled) return -1;
-
-              // Restricted models go after available but before disabled
-              if (aRestricted && !bRestricted) return 1;
-              if (!aRestricted && bRestricted) return -1;
-
-              return 0;
-            })
-            .map((availableModel) => {
-              const config = MODEL_CONFIG[availableModel.id];
-              // Unknown models are treated as disabled
-              const isDisabled = config?.disabled || !config;
-              const requiresPro = config?.requiresPro || false;
-              const requiresStarter = config?.requiresStarter || false;
-              const hasAccess = hasAccessToModel(availableModel.id);
-              const isRestricted = (requiresPro || requiresStarter) && !hasAccess;
-
-              // Disable non-vision models if chat has images
-              const isDisabledDueToImages = chatHasImages && !config?.supportsVision;
-              const effectivelyDisabled = isDisabled || isDisabledDueToImages;
-
-              return (
-                <DropdownMenuItem
-                  key={availableModel.id}
-                  onClick={() => {
-                    if (effectivelyDisabled) return;
-                    if (isRestricted) {
-                      // Navigate to pricing page for upgrade
-                      navigate({ to: "/pricing" });
-                    } else {
-                      setModel(availableModel.id);
-                    }
-                  }}
-                  className={`flex items-center justify-between group ${
-                    effectivelyDisabled ? "opacity-50 cursor-not-allowed" : ""
-                  } ${isRestricted ? "hover:bg-purple-50 dark:hover:bg-purple-950/20" : ""}`}
-                  disabled={effectivelyDisabled}
-                >
-                  <div className="flex items-center gap-2 flex-1">
-                    <div className="text-sm">{getDisplayName(availableModel.id, true)}</div>
-                    {isRestricted && !isDisabledDueToImages && (
-                      <span className="text-[10px] text-purple-600 dark:text-purple-400 opacity-0 group-hover:opacity-100 transition-opacity">
-                        Upgrade?
-                      </span>
-                    )}
-                  </div>
-                  {model === availableModel.id && <Check className="h-4 w-4" />}
-                </DropdownMenuItem>
-              );
-            })}
-      </DropdownMenuContent>
-    </DropdownMenu>
+    <>
+      <DropdownMenu>
+        <DropdownMenuTrigger asChild>
+          <Button
+            variant="ghost"
+            size="sm"
+            className="h-8 gap-1 px-2"
+            data-testid="model-selector-button"
+            aria-label={`Current model: ${MODEL_CONFIG[model]?.displayName || model}. Click to change model.`}
+          >
+            {modelDisplay}
+            <ChevronDown className="h-3 w-3 opacity-50" />
+          </Button>
+        </DropdownMenuTrigger>
+        <DropdownMenuContent align="start" className="w-72">
+          {availableModels &&
+            Array.isArray(availableModels) &&
+            // Sort models: vision-capable first (if images present), then available, then restricted, then disabled
+            [...availableModels]
+              .sort((a, b) => {
+                const aConfig = MODEL_CONFIG[a.id];
+                const bConfig = MODEL_CONFIG[b.id];
+
+                // If chat has images, prioritize vision models
+                if (chatHasImages) {
+                  const aHasVision = aConfig?.supportsVision || false;
+                  const bHasVision = bConfig?.supportsVision || false;
+                  if (aHasVision && !bHasVision) return -1;
+                  if (!aHasVision && bHasVision) return 1;
+                }
+
+                // Unknown models are treated as disabled
+                const aDisabled = aConfig?.disabled || !aConfig;
+                const bDisabled = bConfig?.disabled || !bConfig;
+                const aRestricted =
+                  (aConfig?.requiresPro || aConfig?.requiresStarter || false) &&
+                  !hasAccessToModel(a.id);
+                const bRestricted =
+                  (bConfig?.requiresPro || bConfig?.requiresStarter || false) &&
+                  !hasAccessToModel(b.id);
+
+                // Disabled models go last
+                if (aDisabled && !bDisabled) return 1;
+                if (!aDisabled && bDisabled) return -1;
+
+                // Restricted models go after available but before disabled
+                if (aRestricted && !bRestricted) return 1;
+                if (!aRestricted && bRestricted) return -1;
+
+                return 0;
+              })
+              .map((availableModel) => {
+                const config = MODEL_CONFIG[availableModel.id];
+                // Unknown models are treated as disabled
+                const isDisabled = config?.disabled || !config;
+                const requiresPro = config?.requiresPro || false;
+                const requiresStarter = config?.requiresStarter || false;
+                const hasAccess = hasAccessToModel(availableModel.id);
+                const isRestricted = (requiresPro || requiresStarter) && !hasAccess;
+
+                // Disable non-vision models if chat has images
+                const isDisabledDueToImages = chatHasImages && !config?.supportsVision;
+                const effectivelyDisabled = isDisabled || isDisabledDueToImages;
+
+                return (
+                  <DropdownMenuItem
+                    key={availableModel.id}
+                    onClick={() => {
+                      if (effectivelyDisabled) return;
+                      if (isRestricted) {
+                        // Show upgrade dialog for restricted model
+                        const modelConfig = MODEL_CONFIG[availableModel.id];
+                        setSelectedModelName(modelConfig?.displayName || availableModel.id);
+                        setUpgradeDialogOpen(true);
+                      } else {
+                        setModel(availableModel.id);
+                      }
+                    }}
+                    className={`flex items-center justify-between group ${
+                      effectivelyDisabled ? "opacity-50 cursor-not-allowed" : ""
+                    } ${isRestricted ? "hover:bg-purple-50 dark:hover:bg-purple-950/20" : ""}`}
+                    disabled={effectivelyDisabled}
+                  >
+                    <div className="flex items-center gap-2 flex-1">
+                      <div className="text-sm">{getDisplayName(availableModel.id, true)}</div>
+                    </div>
+                    {model === availableModel.id && <Check className="h-4 w-4" />}
+                  </DropdownMenuItem>
+                );
+              })}
+        </DropdownMenuContent>
+      </DropdownMenu>
+
+      <UpgradePromptDialog
+        open={upgradeDialogOpen}
+        onOpenChange={setUpgradeDialogOpen}
+        feature="model"
+        modelName={selectedModelName}
+      />
+    </>
   );
 }
diff --git a/frontend/src/components/UpgradePromptDialog.tsx b/frontend/src/components/UpgradePromptDialog.tsx
new file mode 100644
index 00000000..55d45823
--- /dev/null
+++ b/frontend/src/components/UpgradePromptDialog.tsx
@@ -0,0 +1,139 @@
+import {
+  Dialog,
+  DialogContent,
+  DialogDescription,
+  DialogFooter,
+  DialogHeader,
+  DialogTitle
+} from "@/components/ui/dialog";
+import { Button } from "@/components/ui/button";
+import { Image, Mic, Sparkles, Check, Cpu, Volume2 } from "lucide-react";
+import { useNavigate } from "@tanstack/react-router";
+
+interface UpgradePromptDialogProps {
+  open: boolean;
+  onOpenChange: (open: boolean) => void;
+  feature: "image" | "voice" | "model" | "tts";
+  modelName?: string;
+}
+
+export function UpgradePromptDialog({
+  open,
+  onOpenChange,
+  feature,
+  modelName
+}: UpgradePromptDialogProps) {
+  const navigate = useNavigate();
+
+  const handleUpgrade = () => {
+    onOpenChange(false);
+    navigate({ to: "/pricing" });
+  };
+
+  const getFeatureInfo = () => {
+    if (feature === "image") {
+      return {
+        icon: <Image className="h-8 w-8" />,
+        title: "Image Upload",
+        description: "Upload and analyze images with AI-powered vision models",
+        requiredPlan: "Starter",
+        benefits: [
+          "Images stay private with end-to-end encryption",
+          "Upload JPEG, PNG, and WebP formats securely",
+          "Use advanced vision models like Gemma 3",
+          "Analyze diagrams, screenshots, and photos privately",
+          "Extract text from images without exposing data"
+        ]
+      };
+    } else if (feature === "voice") {
+      return {
+        icon: <Mic className="h-8 w-8" />,
+        title: "Voice Recording",
+        description: "Record and transcribe voice messages with Whisper AI",
+        requiredPlan: "Pro",
+        benefits: [
+          "Voice recordings are end-to-end encrypted",
+          "Record messages directly in chat securely",
+          "Private transcription with Whisper Large v3",
+          "Support for multiple languages",
+          "No audio data is stored or used for training"
+        ]
+      };
+    } else if (feature === "tts") {
+      return {
+        icon: <Volume2 className="h-8 w-8" />,
+        title: "Text-to-Speech",
+        description: "Listen to AI responses with natural-sounding voices",
+        requiredPlan: "Pro",
+        benefits: [
+          "Audio generation happens privately on secure servers",
+          "Natural-sounding AI voices",
+          "Perfect for accessibility or multitasking",
+          "Listen to long responses hands-free"
+        ]
+      };
+    } else {
+      return {
+        icon: <Cpu className="h-8 w-8" />,
+        title: modelName ? `Access ${modelName}` : "Powerful AI Models",
+        description: "Get access to our most advanced AI models for superior performance",
+        requiredPlan: "Pro",
+        benefits: [
+          "All models run in secure, encrypted environments",
+          "Access to DeepSeek R1 for advanced reasoning",
+          "OpenAI GPT-OSS, Mistral, Qwen, and more",
+          "Higher token limits for longer conversations",
+          "Priority access to new models as they launch"
+        ]
+      };
+    }
+  };
+
+  const info = getFeatureInfo();
+
+  return (
+    <Dialog open={open} onOpenChange={onOpenChange}>
+      <DialogContent className="sm:max-w-md">
+        <DialogHeader>
+          <div className="flex items-center gap-3 mb-2">
+            <div className="p-2 rounded-lg bg-primary/10 text-primary">{info.icon}</div>
+            <DialogTitle>{info.title}</DialogTitle>
+          </div>
+          <DialogDescription className="text-base">{info.description}</DialogDescription>
+        </DialogHeader>
+
+        <div className="space-y-4 py-4">
+          <div className="space-y-2">
+            <p className="text-sm font-medium text-muted-foreground">
+              Available with Pro plan and above
+            </p>
+            <ul className="space-y-2">
+              {info.benefits.map((benefit, i) => (
+                <li key={i} className="flex items-start gap-2 text-sm">
+                  <Check className="h-4 w-4 text-green-500 mt-0.5 shrink-0" />
+                  <span>{benefit}</span>
+                </li>
+              ))}
+            </ul>
+          </div>
+
+          <div className="pt-2 border-t">
+            <p className="text-sm text-muted-foreground">
+              Plus access to 6 powerful models (including DeepSeek R1), API access, and more usage
+            </p>
+          </div>
+        </div>
+
+        <DialogFooter className="gap-2 sm:gap-0">
+          <Button variant="outline" onClick={() => onOpenChange(false)}>
+            Maybe Later
+          </Button>
+          <Button onClick={handleUpgrade} className="gap-2">
+            <Sparkles className="h-4 w-4" />
+            View Plans
+          </Button>
+        </DialogFooter>
+      </DialogContent>
+    </Dialog>
+  );
+}
diff --git a/frontend/src/config/pricingConfig.tsx b/frontend/src/config/pricingConfig.tsx
index d01bc917..1d1b7ce9 100644
--- a/frontend/src/config/pricingConfig.tsx
+++ b/frontend/src/config/pricingConfig.tsx
@@ -44,24 +44,13 @@ export const PRICING_PLANS: PricingPlan[] = [
       { text: "Rename Chats", included: true, icon: <Check className="w-4 h-4 text-green-500" /> },
       { text: "Image Upload", included: false, icon: <X className="w-4 h-4 text-red-500" /> },
       {
-        text: "DeepSeek R1 0528 671B",
+        text: "Voice Recording (Whisper)",
         included: false,
         icon: <X className="w-4 h-4 text-red-500" />
       },
+      { text: "Text-to-Speech", included: false, icon: <X className="w-4 h-4 text-red-500" /> },
       {
-        text: "OpenAI GPT-OSS 120B",
-        included: false,
-        icon: <X className="w-4 h-4 text-red-500" />
-      },
-      { text: "Gemma 3 27B", included: false, icon: <X className="w-4 h-4 text-red-500" /> },
-      {
-        text: "Mistral Small 3.1 24B",
-        included: false,
-        icon: <X className="w-4 h-4 text-red-500" />
-      },
-      { text: "Qwen 2.5 72B", included: false, icon: <X className="w-4 h-4 text-red-500" /> },
-      {
-        text: "Qwen3 Coder 480B",
+        text: "6 Powerful Models (including DeepSeek R1)",
         included: false,
         icon: <X className="w-4 h-4 text-red-500" />
       },
@@ -90,28 +79,17 @@ export const PRICING_PLANS: PricingPlan[] = [
         icon: <Check className="w-4 h-4 text-green-500" />
       },
       { text: "Gemma 3 27B", included: true, icon: <Check className="w-4 h-4 text-green-500" /> },
+      { text: "Image Upload", included: true, icon: <Check className="w-4 h-4 text-green-500" /> },
       {
-        text: "DeepSeek R1 0528 671B",
-        included: false,
-        icon: <X className="w-4 h-4 text-red-500" />
-      },
-      {
-        text: "OpenAI GPT-OSS 120B",
-        included: false,
-        icon: <X className="w-4 h-4 text-red-500" />
-      },
-      {
-        text: "Mistral Small 3.1 24B",
+        text: "Voice Recording (Whisper)",
         included: false,
         icon: <X className="w-4 h-4 text-red-500" />
       },
-      { text: "Qwen 2.5 72B", included: false, icon: <X className="w-4 h-4 text-red-500" /> },
       {
-        text: "Qwen3 Coder 480B",
+        text: "5 More Powerful Models",
         included: false,
         icon: <X className="w-4 h-4 text-red-500" />
       },
-      { text: "Image Upload", included: false, icon: <X className="w-4 h-4 text-red-500" /> },
       { text: "API Access", included: false, icon: <X className="w-4 h-4 text-red-500" /> }
     ],
     ctaText: "Start Chatting"
@@ -133,28 +111,17 @@ export const PRICING_PLANS: PricingPlan[] = [
       },
       { text: "Image Upload", included: true, icon: <Check className="w-4 h-4 text-green-500" /> },
       {
-        text: "DeepSeek R1 0528 671B",
+        text: "Voice Recording (Whisper Large v3)",
         included: true,
         icon: <Check className="w-4 h-4 text-green-500" />
       },
       {
-        text: "OpenAI GPT-OSS 120B",
-        included: true,
-        icon: <Check className="w-4 h-4 text-green-500" />
-      },
-      { text: "Gemma 3 27B", included: true, icon: <Check className="w-4 h-4 text-green-500" /> },
-      {
-        text: "Mistral Small 3.1 24B",
+        text: "Text-to-Speech",
         included: true,
         icon: <Check className="w-4 h-4 text-green-500" />
       },
       {
-        text: "Qwen 2.5 72B",
-        included: true,
-        icon: <Check className="w-4 h-4 text-green-500" />
-      },
-      {
-        text: "Qwen3 Coder 480B",
+        text: "6 Powerful Models (including DeepSeek R1)",
         included: true,
         icon: <Check className="w-4 h-4 text-green-500" />
       },
@@ -194,28 +161,17 @@ export const PRICING_PLANS: PricingPlan[] = [
       },
       { text: "Image Upload", included: true, icon: <Check className="w-4 h-4 text-green-500" /> },
       {
-        text: "DeepSeek R1 0528 671B",
-        included: true,
-        icon: <Check className="w-4 h-4 text-green-500" />
-      },
-      {
-        text: "OpenAI GPT-OSS 120B",
-        included: true,
-        icon: <Check className="w-4 h-4 text-green-500" />
-      },
-      { text: "Gemma 3 27B", included: true, icon: <Check className="w-4 h-4 text-green-500" /> },
-      {
-        text: "Mistral Small 3.1 24B",
+        text: "Voice Recording (Whisper Large v3)",
         included: true,
         icon: <Check className="w-4 h-4 text-green-500" />
       },
       {
-        text: "Qwen 2.5 72B",
+        text: "Text-to-Speech",
         included: true,
         icon: <Check className="w-4 h-4 text-green-500" />
       },
       {
-        text: "Qwen3 Coder 480B",
+        text: "6 Powerful Models (including DeepSeek R1)",
         included: true,
         icon: <Check className="w-4 h-4 text-green-500" />
       },
@@ -264,28 +220,17 @@ export const PRICING_PLANS: PricingPlan[] = [
       },
       { text: "Image Upload", included: true, icon: <Check className="w-4 h-4 text-green-500" /> },
       {
-        text: "DeepSeek R1 0528 671B",
-        included: true,
-        icon: <Check className="w-4 h-4 text-green-500" />
-      },
-      {
-        text: "OpenAI GPT-OSS 120B",
-        included: true,
-        icon: <Check className="w-4 h-4 text-green-500" />
-      },
-      { text: "Gemma 3 27B", included: true, icon: <Check className="w-4 h-4 text-green-500" /> },
-      {
-        text: "Mistral Small 3.1 24B",
+        text: "Voice Recording (Whisper Large v3)",
         included: true,
         icon: <Check className="w-4 h-4 text-green-500" />
       },
       {
-        text: "Qwen 2.5 72B",
+        text: "Text-to-Speech",
         included: true,
         icon: <Check className="w-4 h-4 text-green-500" />
       },
       {
-        text: "Qwen3 Coder 480B",
+        text: "6 Powerful Models (including DeepSeek R1)",
         included: true,
         icon: <Check className="w-4 h-4 text-green-500" />
       },
diff --git a/frontend/src/routes/_auth.chat.$chatId.tsx b/frontend/src/routes/_auth.chat.$chatId.tsx
index 3977d6e6..c6e0c1c6 100644
--- a/frontend/src/routes/_auth.chat.$chatId.tsx
+++ b/frontend/src/routes/_auth.chat.$chatId.tsx
@@ -16,6 +16,10 @@ import { useOpenAI } from "@/ai/useOpenAi";
 import { useLocalState } from "@/state/useLocalState";
 import { Markdown, stripThinkingTags } from "@/components/markdown";
 import { ChatMessage, DEFAULT_MODEL_ID } from "@/state/LocalStateContext";
+import { UpgradePromptDialog } from "@/components/UpgradePromptDialog";
+import { useQuery } from "@tanstack/react-query";
+import { getBillingService } from "@/billing/billingService";
+import { cn } from "@/utils/utils";
 import { Sidebar, SidebarToggle } from "@/components/Sidebar";
 import { useQueryClient } from "@tanstack/react-query";
 import { Button } from "@/components/ui/button";
@@ -82,10 +86,38 @@ function SystemMessage({
   const textWithoutThinking = stripThinkingTags(text);
   const { isCopied, handleCopy } = useCopyToClipboard(textWithoutThinking);
   const [isPlaying, setIsPlaying] = useState(false);
+  const [upgradeDialogOpen, setUpgradeDialogOpen] = useState(false);
   const audioRef = useRef<HTMLAudioElement | null>(null);
   const openai = useOpenAI();
+  const { setBillingStatus } = useLocalState();
+
+  // Fetch billing status to check if user has Pro/Team/Max access
+  const { data: billingStatus } = useQuery({
+    queryKey: ["billingStatus"],
+    queryFn: async () => {
+      const billingService = getBillingService();
+      const status = await billingService.getBillingStatus();
+      setBillingStatus(status);
+      return status;
+    }
+  });
+
+  // Check if user has Pro/Team/Max access for TTS
+  const hasProTeamAccess =
+    billingStatus &&
+    (billingStatus.product_name?.toLowerCase().includes("pro") ||
+      billingStatus.product_name?.toLowerCase().includes("max") ||
+      billingStatus.product_name?.toLowerCase().includes("team"));
+
+  const canUseTTS = hasProTeamAccess;
 
   const handleTTS = useCallback(async () => {
+    // Check if user has access
+    if (!canUseTTS) {
+      setUpgradeDialogOpen(true);
+      return;
+    }
+
     if (isPlaying) {
       // Stop playing
       if (audioRef.current) {
@@ -135,7 +167,7 @@ function SystemMessage({
       console.error("TTS error:", error);
       setIsPlaying(false);
     }
-  }, [textWithoutThinking, isPlaying, openai]);
+  }, [textWithoutThinking, isPlaying, openai, canUseTTS]);
 
   // Cleanup on unmount
   useEffect(() => {
@@ -168,7 +200,7 @@ function SystemMessage({
             <Button
               variant="ghost"
               size="sm"
-              className="h-8 w-8 p-0"
+              className={cn("h-8 w-8 p-0", !canUseTTS && "opacity-50")}
               onClick={handleTTS}
               aria-label={isPlaying ? "Stop audio" : "Play audio"}
             >
@@ -177,6 +209,12 @@ function SystemMessage({
           </div>
         </div>
       </div>
+
+      <UpgradePromptDialog
+        open={upgradeDialogOpen}
+        onOpenChange={setUpgradeDialogOpen}
+        feature="tts"
+      />
     </div>
   );
 }

From 6a8f561b1bfbceacf006874b87de4509cd653210 Mon Sep 17 00:00:00 2001
From: Tony Giorgio <tonygiorgio@protonmail.com>
Date: Mon, 15 Sep 2025 20:19:00 -0500
Subject: [PATCH 4/6] feat: add recording overlay UI for voice input
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implemented a clean recording overlay that appears when using voice input with visual
feedback including waveform animation, timer, and loading states. The overlay stays
visible during transcription and sending, providing a smooth UX without text flashing.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

feat: auto-play TTS for assistant responses after voice input

- Track when messages are sent via voice through ChatBox component
- Pass voice flag through LocalState context for navigation from home
- Auto-play TTS when assistant response completes after voice message
- Keep recording overlay visible until navigation completes to prevent flash
- Recording overlay disappears immediately on chat page after sending
- Simplify recording overlay for compact ChatBox (chat route): just timer and buttons
- Keep full recording overlay for tall ChatBox (index route): waveform, timer, recording indicator
- Works for both in-chat voice messages and voice messages from home page

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 frontend/src/components/ChatBox.tsx          | 589 +++++++++++--------
 frontend/src/components/RecordingOverlay.tsx | 163 +++++
 frontend/src/routes/_auth.chat.$chatId.tsx   |  66 ++-
 frontend/src/routes/index.tsx                |   4 +-
 frontend/src/state/LocalStateContext.tsx     |   7 +
 frontend/src/state/LocalStateContextDef.ts   |   4 +
 6 files changed, 571 insertions(+), 262 deletions(-)
 create mode 100644 frontend/src/components/RecordingOverlay.tsx

diff --git a/frontend/src/components/ChatBox.tsx b/frontend/src/components/ChatBox.tsx
index 3b0b38da..6695aea7 100644
--- a/frontend/src/components/ChatBox.tsx
+++ b/frontend/src/components/ChatBox.tsx
@@ -3,6 +3,7 @@ import RecordRTC from "recordrtc";
 import { Button } from "@/components/ui/button";
 import { Label } from "@/components/ui/label";
 import { UpgradePromptDialog } from "@/components/UpgradePromptDialog";
+import { RecordingOverlay } from "@/components/RecordingOverlay";
 import { useEffect, useRef, useState, useMemo } from "react";
 import { useLocalState } from "@/state/useLocalState";
 import { cn, useIsMobile } from "@/utils/utils";
@@ -201,8 +202,9 @@ export default function Component({
     systemPrompt?: string,
     images?: File[],
     documentText?: string,
-    documentMetadata?: { filename: string; fullContent: string }
-  ) => void;
+    documentMetadata?: { filename: string; fullContent: string },
+    sentViaVoice?: boolean
+  ) => void | Promise<void>;
   startTall?: boolean;
   messages?: ChatMessage[];
   isStreaming?: boolean;
@@ -245,6 +247,7 @@ export default function Component({
   // Audio recording state
   const [isRecording, setIsRecording] = useState(false);
   const [isTranscribing, setIsTranscribing] = useState(false);
+  const [isProcessingSend, setIsProcessingSend] = useState(false);
   const recorderRef = useRef<RecordRTC | null>(null);
   const streamRef = useRef<MediaStream | null>(null);
 
@@ -508,8 +511,15 @@ export default function Component({
     }
   };
 
-  const stopRecording = () => {
+  const stopRecording = (shouldSend: boolean = false) => {
     if (recorderRef.current && isRecording) {
+      // Only hide immediately if canceling, keep visible if sending
+      if (!shouldSend) {
+        setIsRecording(false);
+      } else {
+        setIsProcessingSend(true); // Show processing state
+      }
+
       recorderRef.current.stopRecording(async () => {
         const blob = recorderRef.current!.getBlob();
 
@@ -518,19 +528,57 @@ export default function Component({
           type: "audio/wav"
         });
 
-        setIsTranscribing(true);
-        try {
-          const result = await os.transcribeAudio(audioFile, "whisper-large-v3");
-
-          // Append transcribed text to existing input
-          setInputValue((prev) => {
-            const newValue = prev ? `${prev} ${result.text}` : result.text;
-            return newValue;
-          });
-        } catch (error) {
-          console.error("Transcription failed:", error);
-        } finally {
-          setIsTranscribing(false);
+        if (shouldSend) {
+          setIsTranscribing(true);
+          try {
+            const result = await os.transcribeAudio(audioFile, "whisper-large-v3");
+
+            // Set the transcribed text
+            const transcribedText = result.text.trim();
+
+            if (transcribedText) {
+              // Directly submit without updating the input field
+              const newValue = inputValue ? `${inputValue} ${transcribedText}` : transcribedText;
+
+              if (newValue.trim()) {
+                // Wait for onSubmit to complete (in case it returns a Promise for navigation)
+                await onSubmit(
+                  newValue.trim(),
+                  messages.length === 0 ? systemPromptValue.trim() || undefined : undefined,
+                  images,
+                  uploadedDocument?.cleanedText,
+                  uploadedDocument
+                    ? {
+                        filename: uploadedDocument.parsed.document.filename,
+                        fullContent:
+                          uploadedDocument.parsed.document.md_content ||
+                          uploadedDocument.parsed.document.json_content ||
+                          uploadedDocument.parsed.document.html_content ||
+                          uploadedDocument.parsed.document.text_content ||
+                          uploadedDocument.parsed.document.doctags_content ||
+                          ""
+                      }
+                    : undefined,
+                  true // sentViaVoice flag
+                );
+
+                // Clear the input and other states
+                setInputValue("");
+                imageUrls.forEach((url) => URL.revokeObjectURL(url));
+                setImageUrls(new Map());
+                setImages([]);
+                setUploadedDocument(null);
+                setDocumentError(null);
+                setImageError(null);
+              }
+            }
+          } catch (error) {
+            console.error("Transcription failed:", error);
+          } finally {
+            setIsTranscribing(false);
+            setIsProcessingSend(false);
+            setIsRecording(false); // Hide overlay after send is complete
+          }
         }
 
         // Clean up
@@ -540,8 +588,6 @@ export default function Component({
         }
         recorderRef.current = null;
       });
-
-      setIsRecording(false);
     }
   };
 
@@ -552,6 +598,26 @@ export default function Component({
       startRecording();
     }
   };
+
+  const handleRecordingSend = () => {
+    stopRecording(true);
+  };
+
+  const handleRecordingCancel = () => {
+    if (recorderRef.current && isRecording) {
+      setIsRecording(false); // Hide overlay immediately
+
+      recorderRef.current.stopRecording(() => {
+        // Clean up without transcribing
+        if (streamRef.current) {
+          streamRef.current.getTracks().forEach((track) => track.stop());
+          streamRef.current = null;
+        }
+        recorderRef.current = null;
+      });
+    }
+  };
+
   const [isFocused, setIsFocused] = useState(false);
   const inputRef = useRef<HTMLTextAreaElement>(null);
   const systemPromptRef = useRef<HTMLTextAreaElement>(null);
@@ -814,262 +880,273 @@ export default function Component({
   })();
 
   return (
-    <div className="flex flex-col w-full">
-      {/* Simple System Prompt Section - just a gear button and input when expanded */}
-      <div className={cn(chatId ? "hidden" : !canEditSystemPrompt ? "invisible mb-2" : "mb-2")}>
-        <div className="flex items-center gap-2 mb-1">
-          <button
-            type="button"
-            disabled={!canEditSystemPrompt}
-            onClick={() => setIsSystemPromptExpanded(!isSystemPromptExpanded)}
-            className="flex items-center gap-1.5 text-xs font-medium transition-colors text-muted-foreground hover:text-foreground cursor-pointer disabled:cursor-default"
-            title="System Prompt"
-            aria-label="Toggle system prompt"
-            aria-expanded={isSystemPromptExpanded}
-          >
-            <Bot className="size-6" />
-            {systemPromptValue.trim() && (
-              <div className="size-2 bg-primary rounded-full" title="System prompt active" />
-            )}
-          </button>
+    <div className="flex flex-col w-full relative">
+      {isRecording && (
+        <RecordingOverlay
+          isRecording={isRecording}
+          isProcessing={isProcessingSend}
+          onSend={handleRecordingSend}
+          onCancel={handleRecordingCancel}
+          isCompact={!startTall}
+        />
+      )}
+      <div className={cn("flex flex-col w-full", isRecording && "invisible")}>
+        {/* Simple System Prompt Section - just a gear button and input when expanded */}
+        <div className={cn(chatId ? "hidden" : !canEditSystemPrompt ? "invisible mb-2" : "mb-2")}>
+          <div className="flex items-center gap-2 mb-1">
+            <button
+              type="button"
+              disabled={!canEditSystemPrompt}
+              onClick={() => setIsSystemPromptExpanded(!isSystemPromptExpanded)}
+              className="flex items-center gap-1.5 text-xs font-medium transition-colors text-muted-foreground hover:text-foreground cursor-pointer disabled:cursor-default"
+              title="System Prompt"
+              aria-label="Toggle system prompt"
+              aria-expanded={isSystemPromptExpanded}
+            >
+              <Bot className="size-6" />
+              {systemPromptValue.trim() && (
+                <div className="size-2 bg-primary rounded-full" title="System prompt active" />
+              )}
+            </button>
+          </div>
+
+          {isSystemPromptExpanded && (
+            <textarea
+              ref={systemPromptRef}
+              value={systemPromptValue}
+              onChange={(e) => setSystemPromptValue(e.target.value)}
+              placeholder="Enter instructions for the AI (e.g., 'You are a helpful coding assistant...')"
+              rows={2}
+              className="w-full p-2 text-sm border border-muted-foreground/20 rounded-md bg-muted/50 placeholder:text-muted-foreground/70 focus:outline-none focus:ring-1 focus:ring-ring resize-none transition-colors"
+              style={{
+                height: "auto",
+                resize: "none",
+                overflowY: "auto",
+                maxHeight: "8rem",
+                minHeight: "3rem"
+              }}
+            />
+          )}
         </div>
 
-        {isSystemPromptExpanded && (
+        <TokenWarning
+          chatId={chatId}
+          onCompress={onCompress}
+          isCompressing={isSummarizing}
+          tokenPercentage={tokenPercentage}
+        />
+
+        <form
+          className={cn(
+            "p-2 rounded-lg border border-primary bg-background/80 backdrop-blur-lg focus-within:ring-1 focus-within:ring-ring",
+            isInputDisabled && "opacity-50"
+          )}
+          onSubmit={handleSubmit}
+          onClick={(e) => {
+            if (isInputDisabled) {
+              e.preventDefault();
+              return;
+            }
+            if (!isFocused) {
+              inputRef.current?.focus();
+            }
+          }}
+        >
+          {(images.length > 0 ||
+            uploadedDocument ||
+            isUploadingDocument ||
+            documentError ||
+            imageError ||
+            imageConversionError) && (
+            <div className="mb-2 space-y-2">
+              {images.length > 0 && (
+                <div className="flex gap-2 items-center flex-wrap">
+                  {images.map((f, i) => (
+                    <div key={i} className="relative">
+                      <img
+                        src={imageUrls.get(f) || ""}
+                        className="w-10 h-10 object-cover rounded-md"
+                        alt={`Uploaded image ${i + 1}`}
+                      />
+                      <button
+                        type="button"
+                        onClick={() => removeImage(i)}
+                        className="absolute -top-1 -right-1 bg-background rounded-full shadow-sm"
+                        aria-label={`Remove image ${i + 1}`}
+                      >
+                        <X className="h-3 w-3" />
+                      </button>
+                    </div>
+                  ))}
+                </div>
+              )}
+              {(imageError || imageConversionError) && (
+                <div className="text-xs text-destructive p-2 bg-destructive/10 rounded-md">
+                  {imageError || imageConversionError}
+                </div>
+              )}
+              {isUploadingDocument && !uploadedDocument && (
+                <div className="flex items-center gap-2 p-2 bg-muted/50 rounded-md animate-in fade-in duration-200">
+                  <Loader2 className="h-4 w-4 text-muted-foreground animate-spin" />
+                  <span className="text-sm text-muted-foreground">
+                    Processing document securely... This may take a minute.
+                  </span>
+                </div>
+              )}
+              {uploadedDocument && (
+                <div className="flex items-center gap-2 p-2 bg-muted/50 rounded-md">
+                  <FileText className="h-4 w-4 text-muted-foreground" />
+                  <span className="text-sm truncate flex-1">
+                    {uploadedDocument.parsed.document.filename}
+                  </span>
+                  <button
+                    type="button"
+                    onClick={removeDocument}
+                    className="text-muted-foreground hover:text-foreground"
+                    aria-label="Remove document"
+                  >
+                    <X className="h-3 w-3" />
+                  </button>
+                </div>
+              )}
+              {documentError && (
+                <div className="text-xs text-destructive p-2 bg-destructive/10 rounded-md">
+                  {documentError}
+                </div>
+              )}
+            </div>
+          )}
+          <Label htmlFor="message" className="sr-only">
+            Message
+          </Label>
           <textarea
-            ref={systemPromptRef}
-            value={systemPromptValue}
-            onChange={(e) => setSystemPromptValue(e.target.value)}
-            placeholder="Enter instructions for the AI (e.g., 'You are a helpful coding assistant...')"
-            rows={2}
-            className="w-full p-2 text-sm border border-muted-foreground/20 rounded-md bg-muted/50 placeholder:text-muted-foreground/70 focus:outline-none focus:ring-1 focus:ring-ring resize-none transition-colors"
+            disabled={isInputDisabled}
+            ref={inputRef}
+            onKeyDown={handleKeyDown}
+            onFocus={() => setIsFocused(true)}
+            onBlur={() => setIsFocused(false)}
+            id="message"
+            name="message"
+            autoComplete="off"
+            placeholder={placeholderText}
+            rows={1}
             style={{
               height: "auto",
               resize: "none",
               overflowY: "auto",
-              maxHeight: "8rem",
-              minHeight: "3rem"
+              maxHeight: "12rem",
+              ...(startTall ? { minHeight: "6rem" } : {})
             }}
-          />
-        )}
-      </div>
-
-      <TokenWarning
-        chatId={chatId}
-        onCompress={onCompress}
-        isCompressing={isSummarizing}
-        tokenPercentage={tokenPercentage}
-      />
-
-      <form
-        className={cn(
-          "p-2 rounded-lg border border-primary bg-background/80 backdrop-blur-lg focus-within:ring-1 focus-within:ring-ring",
-          isInputDisabled && "opacity-50"
-        )}
-        onSubmit={handleSubmit}
-        onClick={(e) => {
-          if (isInputDisabled) {
-            e.preventDefault();
-            return;
-          }
-          if (!isFocused) {
-            inputRef.current?.focus();
-          }
-        }}
-      >
-        {(images.length > 0 ||
-          uploadedDocument ||
-          isUploadingDocument ||
-          documentError ||
-          imageError ||
-          imageConversionError) && (
-          <div className="mb-2 space-y-2">
-            {images.length > 0 && (
-              <div className="flex gap-2 items-center flex-wrap">
-                {images.map((f, i) => (
-                  <div key={i} className="relative">
-                    <img
-                      src={imageUrls.get(f) || ""}
-                      className="w-10 h-10 object-cover rounded-md"
-                      alt={`Uploaded image ${i + 1}`}
-                    />
-                    <button
-                      type="button"
-                      onClick={() => removeImage(i)}
-                      className="absolute -top-1 -right-1 bg-background rounded-full shadow-sm"
-                      aria-label={`Remove image ${i + 1}`}
-                    >
-                      <X className="h-3 w-3" />
-                    </button>
-                  </div>
-                ))}
-              </div>
-            )}
-            {(imageError || imageConversionError) && (
-              <div className="text-xs text-destructive p-2 bg-destructive/10 rounded-md">
-                {imageError || imageConversionError}
-              </div>
+            className={cn(
+              "flex w-full ring-offset-background bg-background/0",
+              "placeholder:text-muted-foreground focus-visible:outline-none",
+              "disabled:cursor-not-allowed disabled:opacity-50",
+              "!border-0 shadow-none !border-none focus-visible:ring-0 !ring-0",
+              billingStatus === null && "animate-pulse"
             )}
-            {isUploadingDocument && !uploadedDocument && (
-              <div className="flex items-center gap-2 p-2 bg-muted/50 rounded-md animate-in fade-in duration-200">
-                <Loader2 className="h-4 w-4 text-muted-foreground animate-spin" />
-                <span className="text-sm text-muted-foreground">
-                  Processing document securely... This may take a minute.
-                </span>
-              </div>
-            )}
-            {uploadedDocument && (
-              <div className="flex items-center gap-2 p-2 bg-muted/50 rounded-md">
-                <FileText className="h-4 w-4 text-muted-foreground" />
-                <span className="text-sm truncate flex-1">
-                  {uploadedDocument.parsed.document.filename}
-                </span>
-                <button
-                  type="button"
-                  onClick={removeDocument}
-                  className="text-muted-foreground hover:text-foreground"
-                  aria-label="Remove document"
-                >
-                  <X className="h-3 w-3" />
-                </button>
-              </div>
-            )}
-            {documentError && (
-              <div className="text-xs text-destructive p-2 bg-destructive/10 rounded-md">
-                {documentError}
-              </div>
-            )}
-          </div>
-        )}
-        <Label htmlFor="message" className="sr-only">
-          Message
-        </Label>
-        <textarea
-          disabled={isInputDisabled}
-          ref={inputRef}
-          onKeyDown={handleKeyDown}
-          onFocus={() => setIsFocused(true)}
-          onBlur={() => setIsFocused(false)}
-          id="message"
-          name="message"
-          autoComplete="off"
-          placeholder={placeholderText}
-          rows={1}
-          style={{
-            height: "auto",
-            resize: "none",
-            overflowY: "auto",
-            maxHeight: "12rem",
-            ...(startTall ? { minHeight: "6rem" } : {})
-          }}
-          className={cn(
-            "flex w-full ring-offset-background bg-background/0",
-            "placeholder:text-muted-foreground focus-visible:outline-none",
-            "disabled:cursor-not-allowed disabled:opacity-50",
-            "!border-0 shadow-none !border-none focus-visible:ring-0 !ring-0",
-            billingStatus === null && "animate-pulse"
-          )}
-          value={inputValue}
-          onChange={(e) => setInputValue(e.target.value)}
-        />
-        <div className="flex items-center pt-0">
-          <ModelSelector messages={messages} draftImages={images} />
-
-          {/* Hidden file inputs */}
-          <input
-            type="file"
-            accept="image/jpeg,image/jpg,image/png,image/webp"
-            multiple
-            ref={fileInputRef}
-            onChange={handleAddImages}
-            className="hidden"
+            value={inputValue}
+            onChange={(e) => setInputValue(e.target.value)}
           />
-          <input
-            type="file"
-            accept=".pdf,.doc,.docx,.txt,.rtf,.xlsx,.xls,.pptx,.ppt,.md"
-            ref={documentInputRef}
-            onChange={handleDocumentUpload}
-            className="hidden"
-          />
-
-          {/* Image upload button - show for all users */}
-          {!uploadedDocument && (
-            <Button
-              type="button"
-              size="sm"
-              variant="ghost"
-              className={cn("ml-2", !canUseImages && "opacity-50")}
-              onClick={() => {
-                if (!canUseImages) {
-                  setUpgradeFeature("image");
-                  setUpgradeDialogOpen(true);
-                } else {
-                  // If not on a vision model, switch to one first
-                  if (!supportsVision) {
-                    const visionModelId = findFirstVisionModel();
-                    if (visionModelId) {
-                      setModel(visionModelId);
+          <div className="flex items-center pt-0">
+            <ModelSelector messages={messages} draftImages={images} />
+
+            {/* Hidden file inputs */}
+            <input
+              type="file"
+              accept="image/jpeg,image/jpg,image/png,image/webp"
+              multiple
+              ref={fileInputRef}
+              onChange={handleAddImages}
+              className="hidden"
+            />
+            <input
+              type="file"
+              accept=".pdf,.doc,.docx,.txt,.rtf,.xlsx,.xls,.pptx,.ppt,.md"
+              ref={documentInputRef}
+              onChange={handleDocumentUpload}
+              className="hidden"
+            />
+
+            {/* Image upload button - show for all users */}
+            {!uploadedDocument && (
+              <Button
+                type="button"
+                size="sm"
+                variant="ghost"
+                className={cn("ml-2", !canUseImages && "opacity-50")}
+                onClick={() => {
+                  if (!canUseImages) {
+                    setUpgradeFeature("image");
+                    setUpgradeDialogOpen(true);
+                  } else {
+                    // If not on a vision model, switch to one first
+                    if (!supportsVision) {
+                      const visionModelId = findFirstVisionModel();
+                      if (visionModelId) {
+                        setModel(visionModelId);
+                      }
                     }
+                    fileInputRef.current?.click();
                   }
-                  fileInputRef.current?.click();
-                }
-              }}
-              disabled={isInputDisabled}
-              aria-label="Upload images"
-              data-testid="image-upload-button"
-            >
-              <Image className="h-4 w-4" />
-            </Button>
-          )}
+                }}
+                disabled={isInputDisabled}
+                aria-label="Upload images"
+                data-testid="image-upload-button"
+              >
+                <Image className="h-4 w-4" />
+              </Button>
+            )}
+
+            {/* Microphone button - show if whisper model is available */}
+            {hasWhisperModel && (
+              <Button
+                type="button"
+                size="sm"
+                variant="ghost"
+                className={cn("ml-2", !canUseVoice && "opacity-50")}
+                onClick={() => {
+                  if (!canUseVoice) {
+                    setUpgradeFeature("voice");
+                    setUpgradeDialogOpen(true);
+                  } else {
+                    toggleRecording();
+                  }
+                }}
+                disabled={isTranscribing || isInputDisabled}
+                aria-label={isRecording ? "Stop recording" : "Start recording"}
+                data-testid="mic-button"
+              >
+                {isTranscribing ? (
+                  <Loader2 className="h-4 w-4 animate-spin" />
+                ) : isRecording ? (
+                  <Mic className="h-4 w-4 text-orange-500" />
+                ) : (
+                  <Mic className="h-4 w-4" />
+                )}
+              </Button>
+            )}
 
-          {/* Microphone button - show if whisper model is available */}
-          {hasWhisperModel && (
             <Button
-              type="button"
+              type="submit"
               size="sm"
-              variant="ghost"
-              className={cn("ml-2", !canUseVoice && "opacity-50")}
-              onClick={() => {
-                if (!canUseVoice) {
-                  setUpgradeFeature("voice");
-                  setUpgradeDialogOpen(true);
-                } else {
-                  toggleRecording();
-                }
-              }}
-              disabled={isTranscribing || isInputDisabled}
-              aria-label={isRecording ? "Stop recording" : "Start recording"}
-              data-testid="mic-button"
+              className="ml-auto gap-1.5"
+              disabled={
+                (!inputValue.trim() && images.length === 0 && !uploadedDocument) || isSubmitDisabled
+              }
+              aria-label="Send message"
             >
-              {isTranscribing ? (
-                <Loader2 className="h-4 w-4 animate-spin" />
-              ) : isRecording ? (
-                <Mic className="h-4 w-4 text-orange-500" />
-              ) : (
-                <Mic className="h-4 w-4" />
-              )}
+              <CornerRightUp className="size-3.5" />
             </Button>
-          )}
+          </div>
+        </form>
 
-          <Button
-            type="submit"
-            size="sm"
-            className="ml-auto gap-1.5"
-            disabled={
-              (!inputValue.trim() && images.length === 0 && !uploadedDocument) || isSubmitDisabled
-            }
-            aria-label="Send message"
-          >
-            <CornerRightUp className="size-3.5" />
-          </Button>
-        </div>
-      </form>
-
-      {/* Upgrade prompt dialog */}
-      <UpgradePromptDialog
-        open={upgradeDialogOpen}
-        onOpenChange={setUpgradeDialogOpen}
-        feature={upgradeFeature}
-      />
+        {/* Upgrade prompt dialog */}
+        <UpgradePromptDialog
+          open={upgradeDialogOpen}
+          onOpenChange={setUpgradeDialogOpen}
+          feature={upgradeFeature}
+        />
+      </div>
     </div>
   );
 }
diff --git a/frontend/src/components/RecordingOverlay.tsx b/frontend/src/components/RecordingOverlay.tsx
new file mode 100644
index 00000000..b5263ed8
--- /dev/null
+++ b/frontend/src/components/RecordingOverlay.tsx
@@ -0,0 +1,163 @@
+import { useEffect, useState, useRef } from "react";
+import { X, CornerRightUp, Loader2 } from "lucide-react";
+import { Button } from "@/components/ui/button";
+import { cn } from "@/utils/utils";
+
+interface RecordingOverlayProps {
+  isRecording: boolean;
+  isProcessing?: boolean;
+  onSend: () => void;
+  onCancel: () => void;
+  isCompact?: boolean;
+  className?: string;
+}
+
+export function RecordingOverlay({
+  isRecording,
+  isProcessing = false,
+  onSend,
+  onCancel,
+  isCompact = false,
+  className
+}: RecordingOverlayProps) {
+  const [duration, setDuration] = useState(0);
+  const startTimeRef = useRef<number>(0);
+  const animationFrameRef = useRef<number>();
+
+  useEffect(() => {
+    if (isRecording && !isProcessing) {
+      startTimeRef.current = Date.now();
+
+      const updateTimer = () => {
+        const elapsed = Math.floor((Date.now() - startTimeRef.current) / 1000);
+        setDuration(elapsed);
+        animationFrameRef.current = requestAnimationFrame(updateTimer);
+      };
+
+      animationFrameRef.current = requestAnimationFrame(updateTimer);
+
+      return () => {
+        if (animationFrameRef.current) {
+          cancelAnimationFrame(animationFrameRef.current);
+        }
+      };
+    }
+  }, [isRecording, isProcessing]);
+
+  const formatTime = (seconds: number) => {
+    const mins = Math.floor(seconds / 60);
+    const secs = seconds % 60;
+    return `${mins}:${secs.toString().padStart(2, "0")}`;
+  };
+
+  const generateWaveformBars = () => {
+    const barCount = 30;
+    const bars = [];
+
+    for (let i = 0; i < barCount; i++) {
+      const height = Math.random() * 60 + 20;
+      const animationDelay = Math.random() * 0.5;
+
+      bars.push(
+        <div
+          key={i}
+          className="flex-shrink-0 bg-primary/40 rounded-full transition-all duration-300"
+          style={{
+            width: "2px",
+            height: `${height}%`,
+            animation: isRecording
+              ? `pulse ${1 + Math.random()}s ease-in-out ${animationDelay}s infinite`
+              : "none"
+          }}
+        />
+      );
+    }
+
+    return bars;
+  };
+
+  if (!isRecording) return null;
+
+  return (
+    <div
+      className={cn(
+        "absolute inset-0 z-40 flex items-center justify-center",
+        "animate-in fade-in duration-200",
+        className
+      )}
+    >
+      <style>
+        {`
+          @keyframes pulse {
+            0%, 100% { transform: scaleY(0.5); opacity: 0.6; }
+            50% { transform: scaleY(1); opacity: 1; }
+          }
+        `}
+      </style>
+
+      <div className="w-full h-full rounded-lg bg-background/95 backdrop-blur-sm border border-primary/20 relative overflow-hidden flex flex-col items-center justify-center p-4">
+        {/* Top buttons - Cancel on left, Send on right */}
+        <div className="absolute top-3 left-3 right-3 flex justify-between">
+          <Button
+            onClick={onCancel}
+            variant="ghost"
+            size="icon"
+            className="rounded-full hover:bg-muted"
+            aria-label="Cancel recording"
+            disabled={isProcessing}
+          >
+            <X className="h-4 w-4" />
+          </Button>
+
+          <Button
+            onClick={onSend}
+            size={isCompact ? "icon" : "sm"}
+            className={cn(isCompact ? "rounded-full" : "gap-1.5")}
+            aria-label="Send recording"
+            disabled={isProcessing}
+          >
+            {isProcessing ? (
+              <Loader2 className={cn(isCompact ? "h-4 w-4" : "h-3.5 w-3.5", "animate-spin")} />
+            ) : isCompact ? (
+              <CornerRightUp className="h-4 w-4" />
+            ) : (
+              <>
+                <CornerRightUp className="h-3.5 w-3.5" />
+                Send
+              </>
+            )}
+          </Button>
+        </div>
+
+        <div className="flex flex-col items-center gap-6 max-w-md w-full">
+          {/* Waveform visualization - only show when not compact */}
+          {!isCompact && (
+            <div className="flex items-center justify-center h-12 w-full gap-0.5 px-4">
+              {generateWaveformBars()}
+            </div>
+          )}
+
+          {/* Timer */}
+          <div className="text-2xl font-mono text-muted-foreground">{formatTime(duration)}</div>
+
+          {/* Status indicator - only show when not compact */}
+          {!isCompact && (
+            <div className="flex items-center gap-2 text-sm text-muted-foreground">
+              {isProcessing ? (
+                <>
+                  <Loader2 className="w-4 h-4 animate-spin" />
+                  Processing...
+                </>
+              ) : (
+                <>
+                  <div className="w-2 h-2 bg-destructive rounded-full animate-pulse" />
+                  Recording
+                </>
+              )}
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/routes/_auth.chat.$chatId.tsx b/frontend/src/routes/_auth.chat.$chatId.tsx
index c6e0c1c6..4b6fdea1 100644
--- a/frontend/src/routes/_auth.chat.$chatId.tsx
+++ b/frontend/src/routes/_auth.chat.$chatId.tsx
@@ -77,11 +77,13 @@ function UserMessage({ message, chatId }: { message: ChatMessage; chatId: string
 function SystemMessage({
   text,
   loading,
-  chatId
+  chatId,
+  autoPlay = false
 }: {
   text: string;
   loading?: boolean;
   chatId: string;
+  autoPlay?: boolean;
 }) {
   const textWithoutThinking = stripThinkingTags(text);
   const { isCopied, handleCopy } = useCopyToClipboard(textWithoutThinking);
@@ -169,6 +171,14 @@ function SystemMessage({
     }
   }, [textWithoutThinking, isPlaying, openai, canUseTTS]);
 
+  // Auto-play TTS when message is complete and autoPlay is true
+  useEffect(() => {
+    if (autoPlay && !loading && textWithoutThinking && canUseTTS && !isPlaying) {
+      // Only auto-play once when the message completes
+      handleTTS();
+    }
+  }, [autoPlay, loading, canUseTTS]); // Don't include handleTTS to avoid loops
+
   // Cleanup on unmount
   useEffect(() => {
     return () => {
@@ -294,6 +304,8 @@ function ChatComponent() {
     setSystemPrompt,
     userImages,
     setUserImages,
+    sentViaVoice,
+    setSentViaVoice,
     addChat
   } = useLocalState();
   const openai = useOpenAI();
@@ -305,6 +317,8 @@ function ChatComponent() {
 
   const [isSummarizing, setIsSummarizing] = useState(false);
   const [imageConversionError, setImageConversionError] = useState<string | null>(null);
+  const [shouldAutoPlayTTS, setShouldAutoPlayTTS] = useState(false);
+  const [autoPlayMessageIndex, setAutoPlayMessageIndex] = useState<number | null>(null);
 
   const chatContainerRef = useRef<HTMLDivElement>(null);
 
@@ -351,11 +365,18 @@ function ChatComponent() {
       const prompt = userPrompt;
       const sysPrompt = systemPrompt;
       const images = userImages;
+      const wasVoice = sentViaVoice;
+
+      // If sent via voice, set the flag for auto-play TTS
+      if (wasVoice) {
+        setShouldAutoPlayTTS(true);
+      }
 
       // Clear state immediately
       setUserPrompt("");
       setSystemPrompt(null);
       setUserImages([]);
+      setSentViaVoice(false);
 
       // Send message with system prompt as separate parameter
       appendUserMessage(prompt, images, undefined, undefined, sysPrompt || undefined).catch(
@@ -366,6 +387,7 @@ function ChatComponent() {
             setUserPrompt(prompt);
             setSystemPrompt(sysPrompt);
             setUserImages(images);
+            setSentViaVoice(wasVoice);
             initialPromptProcessedRef.current = false;
           }
         }
@@ -375,12 +397,14 @@ function ChatComponent() {
     userPrompt,
     systemPrompt,
     userImages,
+    sentViaVoice,
     localChat.messages.length,
     phase,
     appendUserMessage,
     setUserPrompt,
     setSystemPrompt,
-    setUserImages
+    setUserImages,
+    setSentViaVoice
   ]);
 
   // Handle mobile new chat (matching sidebar behavior)
@@ -478,11 +502,14 @@ function ChatComponent() {
   }, [localChat.messages]);
 
   // Auto-scroll when assistant starts streaming (currentStreamingMessage appears)
+  // and handle TTS auto-play when streaming completes
   const prevHadStreamingMessage = useRef(false);
+  const prevStreamingMessage = useRef<string>("");
 
   useEffect(() => {
     const hasStreamingMessage = !!currentStreamingMessage;
     const justStartedStreaming = hasStreamingMessage && !prevHadStreamingMessage.current;
+    const justFinishedStreaming = !hasStreamingMessage && prevHadStreamingMessage.current;
 
     if (justStartedStreaming) {
       // Scroll when assistant starts streaming
@@ -498,8 +525,29 @@ function ChatComponent() {
       }
     }
 
+    // When streaming just completed and we should auto-play TTS
+    if (justFinishedStreaming && shouldAutoPlayTTS && prevStreamingMessage.current) {
+      // Find the last assistant message index and trigger TTS for it
+      const lastAssistantMessageIndex = localChat.messages
+        .map((m, i) => (m.role === "assistant" ? i : -1))
+        .filter((i) => i >= 0)
+        .pop();
+
+      if (lastAssistantMessageIndex !== undefined) {
+        // Set the message index to auto-play
+        setAutoPlayMessageIndex(lastAssistantMessageIndex);
+        setShouldAutoPlayTTS(false); // Reset the flag
+
+        // Clear the auto-play index after a short delay to prevent re-playing
+        setTimeout(() => {
+          setAutoPlayMessageIndex(null);
+        }, 1000);
+      }
+    }
+
     prevHadStreamingMessage.current = hasStreamingMessage;
-  }, [currentStreamingMessage]);
+    prevStreamingMessage.current = currentStreamingMessage || "";
+  }, [currentStreamingMessage, shouldAutoPlayTTS, localChat.messages]);
 
   const sendMessage = useCallback(
     async (
@@ -507,10 +555,17 @@ function ChatComponent() {
       systemPrompt?: string,
       images?: File[],
       documentText?: string,
-      documentMetadata?: { filename: string; fullContent: string }
+      documentMetadata?: { filename: string; fullContent: string },
+      sentViaVoice?: boolean
     ) => {
+      // Store the voice flag for later use when response completes
+      if (sentViaVoice) {
+        setShouldAutoPlayTTS(true);
+      }
       // Use the appendUserMessage from the hook with system prompt as separate parameter
-      await appendUserMessage(input, images, documentText, documentMetadata, systemPrompt);
+      // Don't await - let it run in background so the recording overlay can disappear immediately
+      appendUserMessage(input, images, documentText, documentMetadata, systemPrompt);
+      // Return immediately so the overlay disappears right after sending
       // Note: Auto-scrolling is handled by the effect that watches for streaming start
     },
     [appendUserMessage]
@@ -680,6 +735,7 @@ END OF INSTRUCTIONS`;
                         : message.content.find((p) => p.type === "text")?.text || ""
                     }
                     chatId={chatId}
+                    autoPlay={index === autoPlayMessageIndex}
                   />
                 )}
               </div>
diff --git a/frontend/src/routes/index.tsx b/frontend/src/routes/index.tsx
index 631ee080..933f8243 100644
--- a/frontend/src/routes/index.tsx
+++ b/frontend/src/routes/index.tsx
@@ -117,7 +117,8 @@ function Index() {
     systemPrompt?: string,
     images?: File[],
     documentText?: string,
-    documentMetadata?: { filename: string; fullContent: string }
+    documentMetadata?: { filename: string; fullContent: string },
+    sentViaVoice?: boolean
   ) {
     // Allow submission if there's text input, images, or a document
     const hasTextInput = input.trim() !== "";
@@ -143,6 +144,7 @@ function Index() {
     localState.setUserPrompt(finalInput);
     localState.setSystemPrompt(systemPrompt?.trim() || null);
     localState.setUserImages(images || []);
+    localState.setSentViaVoice(sentViaVoice || false);
 
     // Store document metadata if provided (we'll need to add this to LocalState)
     if (documentMetadata) {
diff --git a/frontend/src/state/LocalStateContext.tsx b/frontend/src/state/LocalStateContext.tsx
index 41f35fa7..8b26f8d6 100644
--- a/frontend/src/state/LocalStateContext.tsx
+++ b/frontend/src/state/LocalStateContext.tsx
@@ -28,6 +28,7 @@ export const LocalStateProvider = ({ children }: { children: React.ReactNode })
     userPrompt: "",
     systemPrompt: null as string | null,
     userImages: [] as File[],
+    sentViaVoice: false,
     model:
       aliasModelName(import.meta.env.VITE_DEV_MODEL_OVERRIDE) ||
       (() => {
@@ -107,6 +108,10 @@ export const LocalStateProvider = ({ children }: { children: React.ReactNode })
     setLocalState((prev) => ({ ...prev, userImages: images }));
   }
 
+  function setSentViaVoice(sentViaVoice: boolean) {
+    setLocalState((prev) => ({ ...prev, sentViaVoice }));
+  }
+
   function setBillingStatus(status: BillingStatus) {
     setLocalState((prev) => ({ ...prev, billingStatus: status }));
   }
@@ -304,6 +309,7 @@ export const LocalStateProvider = ({ children }: { children: React.ReactNode })
         userPrompt: localState.userPrompt,
         systemPrompt: localState.systemPrompt,
         userImages: localState.userImages,
+        sentViaVoice: localState.sentViaVoice,
         billingStatus: localState.billingStatus,
         searchQuery: localState.searchQuery,
         setSearchQuery,
@@ -313,6 +319,7 @@ export const LocalStateProvider = ({ children }: { children: React.ReactNode })
         setUserPrompt,
         setSystemPrompt,
         setUserImages,
+        setSentViaVoice,
         addChat,
         getChatById,
         persistChat,
diff --git a/frontend/src/state/LocalStateContextDef.ts b/frontend/src/state/LocalStateContextDef.ts
index b9f4b2ac..52b729e4 100644
--- a/frontend/src/state/LocalStateContextDef.ts
+++ b/frontend/src/state/LocalStateContextDef.ts
@@ -47,6 +47,7 @@ export type LocalState = {
   userPrompt: string;
   systemPrompt: string | null;
   userImages: File[];
+  sentViaVoice: boolean;
   billingStatus: BillingStatus | null;
   /** Current search query for filtering chat history */
   searchQuery: string;
@@ -60,6 +61,7 @@ export type LocalState = {
   setUserPrompt: (prompt: string) => void;
   setSystemPrompt: (prompt: string | null) => void;
   setUserImages: (images: File[]) => void;
+  setSentViaVoice: (sentViaVoice: boolean) => void;
   addChat: (title?: string) => Promise<string>;
   getChatById: (id: string) => Promise<Chat | undefined>;
   persistChat: (chat: Chat) => Promise<void>;
@@ -85,6 +87,7 @@ export const LocalStateContext = createContext<LocalState>({
   userPrompt: "",
   systemPrompt: null,
   userImages: [],
+  sentViaVoice: false,
   billingStatus: null,
   searchQuery: "",
   setSearchQuery: () => void 0,
@@ -94,6 +97,7 @@ export const LocalStateContext = createContext<LocalState>({
   setUserPrompt: () => void 0,
   setSystemPrompt: () => void 0,
   setUserImages: () => void 0,
+  setSentViaVoice: () => void 0,
   addChat: async () => "",
   getChatById: async () => undefined,
   persistChat: async () => void 0,

From d49f7b69b022ffa47dcf10786e37f48c800fcc3a Mon Sep 17 00:00:00 2001
From: Tony Giorgio <tonygiorgio@protonmail.com>
Date: Tue, 16 Sep 2025 10:13:02 -0500
Subject: [PATCH 5/6] feat: add short names for models in collapsed selector
 view
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add shortName field to model configuration to display concise model names
(e.g., "Llama 3.3" instead of "Llama 3.3 70B") in the collapsed dropdown.
Full names with badges remain visible in the expanded dropdown menu.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 frontend/src/components/ModelSelector.tsx | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/frontend/src/components/ModelSelector.tsx b/frontend/src/components/ModelSelector.tsx
index c5345e21..115a2309 100644
--- a/frontend/src/components/ModelSelector.tsx
+++ b/frontend/src/components/ModelSelector.tsx
@@ -15,6 +15,7 @@ import { UpgradePromptDialog } from "@/components/UpgradePromptDialog";
 // Model configuration for display names, badges, and token limits
 type ModelCfg = {
   displayName: string;
+  shortName: string;
   badges?: string[];
   disabled?: boolean;
   requiresPro?: boolean;
@@ -26,14 +27,17 @@ type ModelCfg = {
 export const MODEL_CONFIG: Record<string, ModelCfg> = {
   "ibnzterrell/Meta-Llama-3.3-70B-Instruct-AWQ-INT4": {
     displayName: "Llama 3.3 70B",
+    shortName: "Llama 3.3",
     tokenLimit: 70000
   },
   "llama-3.3-70b": {
     displayName: "Llama 3.3 70B",
+    shortName: "Llama 3.3",
     tokenLimit: 70000
   },
   "leon-se/gemma-3-27b-it-fp8-dynamic": {
     displayName: "Gemma 3 27B",
+    shortName: "Gemma 3",
     badges: ["Starter"],
     requiresStarter: true,
     supportsVision: true,
@@ -41,18 +45,21 @@ export const MODEL_CONFIG: Record<string, ModelCfg> = {
   },
   "deepseek-r1-0528": {
     displayName: "DeepSeek R1 0528 671B",
+    shortName: "DeepSeek R1",
     badges: ["Pro", "New"],
     requiresPro: true,
     tokenLimit: 130000
   },
   "gpt-oss-120b": {
     displayName: "OpenAI GPT-OSS 120B",
+    shortName: "GPT-OSS",
     badges: ["Pro", "New"],
     requiresPro: true,
     tokenLimit: 128000
   },
   "mistral-small-3-1-24b": {
     displayName: "Mistral Small 3.1 24B",
+    shortName: "Mistral Small",
     badges: ["Pro"],
     requiresPro: true,
     supportsVision: true,
@@ -60,12 +67,14 @@ export const MODEL_CONFIG: Record<string, ModelCfg> = {
   },
   "qwen2-5-72b": {
     displayName: "Qwen 2.5 72B",
+    shortName: "Qwen 2.5",
     badges: ["Pro"],
     requiresPro: true,
     tokenLimit: 128000
   },
   "qwen3-coder-480b": {
     displayName: "Qwen3 Coder 480B",
+    shortName: "Qwen3 Coder",
     badges: ["Pro", "New"],
     requiresPro: true,
     tokenLimit: 128000
@@ -274,10 +283,12 @@ export function ModelSelector({
     return <span className="flex items-center gap-1">{elements}</span>;
   };
 
-  // Always show the same format, whether dropdown or not
+  // Show short name in the collapsed view (without badges)
   const modelDisplay = (
     <div className="flex items-center gap-1">
-      <div className="text-xs font-medium">{getDisplayName(model)}</div>
+      <div className="text-xs font-medium">
+        {MODEL_CONFIG[model]?.shortName || MODEL_CONFIG[model]?.displayName || model}
+      </div>
     </div>
   );
 

From 069f857bb7ad8e2c714063010bf32b73a8d8bc8a Mon Sep 17 00:00:00 2001
From: Tony Giorgio <tonygiorgio@protonmail.com>
Date: Tue, 16 Sep 2025 10:16:29 -0500
Subject: [PATCH 6/6] fix: cleanup code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Decreased margin from ml-2 to ml-1 for image upload and mic buttons
to create a more compact layout in the ChatBox component.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Attempt ios voice

fix: optimize macOS audio recording to minimize system audio interruption

- Disable echo cancellation to reduce processing overhead
- Keep noise suppression enabled for audio quality
- Disable auto gain control to prevent audio ducking
- Match sample rate to 16kHz throughout for consistency
- Reduces initial audio interruption on first recording

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

fix: prevent keyboard popup when clicking buttons on iOS

Don't auto-focus the input field when clicking on buttons (mic, photo, model selector) or other interactive elements. This prevents the keyboard from appearing unnecessarily on iOS while maintaining the focus behavior for non-interactive areas of the chat box.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

fix: address PR review feedback for TTS and voice recording features

- Fixed TTS memory leaks with proper URL revocation and cleanup
- Added abort controller for in-flight TTS requests
- Prevented multiple TTS audio overlap with global AudioManager
- Added unmount cleanup for recording to prevent hot-mic issue
- Added re-entry guard to prevent duplicate recording starts
- Protected against empty/null blob edge cases in recording
- Fixed jittery waveform animation with stable, memoized pattern
- Removed unnecessary camera and JIT permissions from Entitlements

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 frontend/src-tauri/Entitlements.plist         |  19 +++
 frontend/src-tauri/Info.plist                 |  10 ++
 .../src-tauri/gen/apple/maple_iOS/Info.plist  |   6 +
 frontend/src-tauri/tauri.conf.json            |   4 +-
 frontend/src/components/ChatBox.tsx           | 109 ++++++++++++++--
 frontend/src/components/RecordingOverlay.tsx  |  62 ++++++---
 frontend/src/routes/_auth.chat.$chatId.tsx    | 122 ++++++++++++++++--
 7 files changed, 290 insertions(+), 42 deletions(-)
 create mode 100644 frontend/src-tauri/Entitlements.plist
 create mode 100644 frontend/src-tauri/Info.plist

diff --git a/frontend/src-tauri/Entitlements.plist b/frontend/src-tauri/Entitlements.plist
new file mode 100644
index 00000000..27c71b44
--- /dev/null
+++ b/frontend/src-tauri/Entitlements.plist
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <!-- Required for microphone access -->
+    <key>com.apple.security.device.audio-input</key>
+    <true/>
+
+    <!-- Network permissions -->
+    <key>com.apple.security.network.client</key>
+    <true/>
+    <key>com.apple.security.network.server</key>
+    <true/>
+
+    <!-- File access permissions -->
+    <key>com.apple.security.files.user-selected.read-write</key>
+    <true/>
+</dict>
+</plist>
diff --git a/frontend/src-tauri/Info.plist b/frontend/src-tauri/Info.plist
new file mode 100644
index 00000000..73070f8e
--- /dev/null
+++ b/frontend/src-tauri/Info.plist
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+  <key>NSMicrophoneUsageDescription</key>
+  <string>Maple needs access to your microphone to record voice messages for your AI conversations.</string>
+  <key>NSCameraUsageDescription</key>
+  <string>Maple needs access to your camera to take photos for your AI conversations.</string>
+</dict>
+</plist>
\ No newline at end of file
diff --git a/frontend/src-tauri/gen/apple/maple_iOS/Info.plist b/frontend/src-tauri/gen/apple/maple_iOS/Info.plist
index 5a1056bf..526c015f 100644
--- a/frontend/src-tauri/gen/apple/maple_iOS/Info.plist
+++ b/frontend/src-tauri/gen/apple/maple_iOS/Info.plist
@@ -62,5 +62,11 @@
 	<string>Maple needs access to your photo library to upload images to your AI conversations.</string>
 	<key>NSCameraUsageDescription</key>
 	<string>Maple needs access to your camera to take photos for your AI conversations.</string>
+	<key>NSMicrophoneUsageDescription</key>
+	<string>Maple needs access to your microphone to record voice messages for your AI conversations.</string>
+	<key>UIBackgroundModes</key>
+	<array>
+		<string>audio</string>
+	</array>
 </dict>
 </plist>
\ No newline at end of file
diff --git a/frontend/src-tauri/tauri.conf.json b/frontend/src-tauri/tauri.conf.json
index ef720958..a48f1aff 100644
--- a/frontend/src-tauri/tauri.conf.json
+++ b/frontend/src-tauri/tauri.conf.json
@@ -43,7 +43,7 @@
       }
     ],
     "security": {
-      "csp": "default-src 'self'; connect-src 'self' https://opensecret.cloud https://*.opensecret.cloud https://trymaple.ai https://*.trymaple.ai https://secretgpt.ai https://*.secretgpt.ai https://*.maple-ca8.pages.dev https://raw.githubusercontent.com localhost:*; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self' data:"
+      "csp": "default-src 'self'; connect-src 'self' https://opensecret.cloud https://*.opensecret.cloud https://trymaple.ai https://*.trymaple.ai https://secretgpt.ai https://*.secretgpt.ai https://*.maple-ca8.pages.dev https://raw.githubusercontent.com localhost:*; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self' data:; worker-src 'self' blob:; media-src 'self' blob: mediastream:"
     }
   },
   "bundle": {
@@ -62,7 +62,7 @@
       "minimumSystemVersion": "10.13",
       "exceptionDomain": "opensecret.cloud",
       "signingIdentity": null,
-      "entitlements": null
+      "entitlements": "./Entitlements.plist"
     },
     "iOS": {
       "developmentTeam": "X773Y823TN"
diff --git a/frontend/src/components/ChatBox.tsx b/frontend/src/components/ChatBox.tsx
index 6695aea7..a046d71d 100644
--- a/frontend/src/components/ChatBox.tsx
+++ b/frontend/src/components/ChatBox.tsx
@@ -248,6 +248,7 @@ export default function Component({
   const [isRecording, setIsRecording] = useState(false);
   const [isTranscribing, setIsTranscribing] = useState(false);
   const [isProcessingSend, setIsProcessingSend] = useState(false);
+  const [audioError, setAudioError] = useState<string | null>(null);
   const recorderRef = useRef<RecordRTC | null>(null);
   const streamRef = useRef<MediaStream | null>(null);
 
@@ -481,12 +482,25 @@ export default function Component({
 
   // Audio recording functions
   const startRecording = async () => {
+    // Prevent duplicate starts
+    if (isRecording || isTranscribing) return;
+
     try {
+      // Check if getUserMedia is available
+      if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
+        setAudioError(
+          "Microphone access is blocked. Please check your browser permissions or disable Lockdown Mode for this site (Settings > Safari > Advanced > Lockdown Mode)."
+        );
+        setTimeout(() => setAudioError(null), 8000); // Longer timeout for this important message
+        return;
+      }
+
       const stream = await navigator.mediaDevices.getUserMedia({
         audio: {
-          echoCancellation: true,
+          echoCancellation: false, // Disable to reduce processing overhead
           noiseSuppression: true,
-          sampleRate: 44100
+          autoGainControl: false, // Disable AGC to prevent audio ducking
+          sampleRate: 16000 // Lower sample rate to match output
         }
       });
 
@@ -498,16 +512,37 @@ export default function Component({
         mimeType: "audio/wav",
         recorderType: RecordRTC.StereoAudioRecorder,
         numberOfAudioChannels: 1, // Mono audio for smaller file size
-        desiredSampRate: 16000, // 16kHz is good for speech
-        timeSlice: 1000 // Get data every second (optional)
+        desiredSampRate: 16000 // 16kHz is good for speech
       });
 
       recorderRef.current = recorder;
       recorder.startRecording();
       setIsRecording(true);
+      setAudioError(null); // Clear any previous errors
     } catch (error) {
       console.error("Failed to start recording:", error);
-      alert("Failed to access microphone. Please check your permissions.");
+      const err = error as Error & { name?: string };
+      console.error("Error name:", err.name);
+      console.error("Error message:", err.message);
+
+      // Handle different error types
+      if (err.name === "NotAllowedError" || err.name === "PermissionDeniedError") {
+        setAudioError(
+          "Microphone access denied. Please enable microphone permissions in Settings > Maple."
+        );
+      } else if (err.name === "NotFoundError" || err.name === "DevicesNotFoundError") {
+        setAudioError("No microphone found. Please check your device.");
+      } else if (err.name === "NotReadableError" || err.name === "TrackStartError") {
+        setAudioError("Microphone is already in use by another app.");
+      } else {
+        // Include error details for debugging
+        setAudioError(
+          `Failed to access microphone: ${err.name || "Unknown error"} - ${err.message || "Please try again"}`
+        );
+      }
+
+      // Clear error after 5 seconds
+      setTimeout(() => setAudioError(null), 5000);
     }
   };
 
@@ -521,7 +556,25 @@ export default function Component({
       }
 
       recorderRef.current.stopRecording(async () => {
-        const blob = recorderRef.current!.getBlob();
+        // Safely get blob (recorder might be null by now)
+        const blob = recorderRef.current?.getBlob();
+
+        if (!blob || blob.size === 0) {
+          console.error("No audio recorded or empty recording");
+          if (shouldSend) {
+            setAudioError("No audio was recorded. Please try again.");
+            setTimeout(() => setAudioError(null), 5000);
+          }
+          // Still need to clean up
+          if (streamRef.current) {
+            streamRef.current.getTracks().forEach((track) => track.stop());
+            streamRef.current = null;
+          }
+          recorderRef.current = null;
+          setIsProcessingSend(false);
+          setIsRecording(false);
+          return;
+        }
 
         // Create a proper WAV file
         const audioFile = new File([blob], "recording.wav", {
@@ -574,6 +627,9 @@ export default function Component({
             }
           } catch (error) {
             console.error("Transcription failed:", error);
+            setAudioError("Failed to transcribe audio. Please try again.");
+            // Clear error after 5 seconds
+            setTimeout(() => setAudioError(null), 5000);
           } finally {
             setIsTranscribing(false);
             setIsProcessingSend(false);
@@ -865,6 +921,23 @@ export default function Component({
     };
   }, [imageUrls]);
 
+  // Cleanup audio recording on unmount
+  useEffect(() => {
+    return () => {
+      // Stop any active recording and release microphone
+      if (streamRef.current) {
+        streamRef.current.getTracks().forEach((track) => track.stop());
+        streamRef.current = null;
+      }
+      // Clean up recorder
+      if (recorderRef.current && isRecording) {
+        recorderRef.current.stopRecording(() => {
+          recorderRef.current = null;
+        });
+      }
+    };
+  }, []);
+
   // No longer need token calculation or plan type check since we removed the hard limit
   // Just keeping the TokenWarning component which handles its own calculations
   const placeholderText = (() => {
@@ -947,7 +1020,17 @@ export default function Component({
               e.preventDefault();
               return;
             }
-            if (!isFocused) {
+            // Don't auto-focus if clicking on a button or interactive element
+            const target = e.target as HTMLElement;
+            const isInteractiveElement =
+              target.tagName === "BUTTON" ||
+              target.closest("button") ||
+              target.tagName === "INPUT" ||
+              target.tagName === "SELECT" ||
+              target.closest('[role="button"]') ||
+              target.closest('[role="combobox"]');
+
+            if (!isInteractiveElement && !isFocused) {
               inputRef.current?.focus();
             }
           }}
@@ -957,7 +1040,8 @@ export default function Component({
             isUploadingDocument ||
             documentError ||
             imageError ||
-            imageConversionError) && (
+            imageConversionError ||
+            audioError) && (
             <div className="mb-2 space-y-2">
               {images.length > 0 && (
                 <div className="flex gap-2 items-center flex-wrap">
@@ -1014,6 +1098,11 @@ export default function Component({
                   {documentError}
                 </div>
               )}
+              {audioError && (
+                <div className="text-xs text-destructive p-2 bg-destructive/10 rounded-md">
+                  {audioError}
+                </div>
+              )}
             </div>
           )}
           <Label htmlFor="message" className="sr-only">
@@ -1073,7 +1162,7 @@ export default function Component({
                 type="button"
                 size="sm"
                 variant="ghost"
-                className={cn("ml-2", !canUseImages && "opacity-50")}
+                className={cn("ml-1", !canUseImages && "opacity-50")}
                 onClick={() => {
                   if (!canUseImages) {
                     setUpgradeFeature("image");
@@ -1103,7 +1192,7 @@ export default function Component({
                 type="button"
                 size="sm"
                 variant="ghost"
-                className={cn("ml-2", !canUseVoice && "opacity-50")}
+                className={cn("ml-1", !canUseVoice && "opacity-50")}
                 onClick={() => {
                   if (!canUseVoice) {
                     setUpgradeFeature("voice");
diff --git a/frontend/src/components/RecordingOverlay.tsx b/frontend/src/components/RecordingOverlay.tsx
index b5263ed8..caa7fb26 100644
--- a/frontend/src/components/RecordingOverlay.tsx
+++ b/frontend/src/components/RecordingOverlay.tsx
@@ -1,4 +1,4 @@
-import { useEffect, useState, useRef } from "react";
+import { useEffect, useState, useRef, useMemo } from "react";
 import { X, CornerRightUp, Loader2 } from "lucide-react";
 import { Button } from "@/components/ui/button";
 import { cn } from "@/utils/utils";
@@ -50,30 +50,54 @@ export function RecordingOverlay({
     return `${mins}:${secs.toString().padStart(2, "0")}`;
   };
 
-  const generateWaveformBars = () => {
+  // Generate stable bar configurations once when component mounts
+  const waveformBars = useMemo(() => {
     const barCount = 30;
     const bars = [];
 
+    // Create a pseudo-random but stable pattern
+    const seed = 12345; // Fixed seed for consistency
+    let rand = seed;
+    const pseudoRandom = () => {
+      rand = (rand * 9301 + 49297) % 233280;
+      return rand / 233280;
+    };
+
     for (let i = 0; i < barCount; i++) {
-      const height = Math.random() * 60 + 20;
-      const animationDelay = Math.random() * 0.5;
-
-      bars.push(
-        <div
-          key={i}
-          className="flex-shrink-0 bg-primary/40 rounded-full transition-all duration-300"
-          style={{
-            width: "2px",
-            height: `${height}%`,
-            animation: isRecording
-              ? `pulse ${1 + Math.random()}s ease-in-out ${animationDelay}s infinite`
-              : "none"
-          }}
-        />
-      );
+      // Create a wave-like pattern with some randomness
+      const baseHeight = 35 + Math.sin((i / barCount) * Math.PI * 2) * 20;
+      const randomVariation = pseudoRandom() * 15;
+      const height = baseHeight + randomVariation;
+
+      // Stagger animations for a more natural flow
+      const animationDuration = 0.8 + pseudoRandom() * 0.4; // 0.8-1.2s
+      const animationDelay = (i / barCount) * 0.3; // Progressive delay
+
+      bars.push({
+        height,
+        animationDuration,
+        animationDelay
+      });
     }
 
     return bars;
+  }, []); // Empty deps = generated once
+
+  const renderWaveformBars = () => {
+    return waveformBars.map((bar, i) => (
+      <div
+        key={i}
+        className="flex-shrink-0 bg-primary/40 rounded-full"
+        style={{
+          width: "2px",
+          height: `${bar.height}%`,
+          animation: isRecording
+            ? `pulse ${bar.animationDuration}s ease-in-out ${bar.animationDelay}s infinite`
+            : "none",
+          transition: "height 0.3s ease-out"
+        }}
+      />
+    ));
   };
 
   if (!isRecording) return null;
@@ -133,7 +157,7 @@ export function RecordingOverlay({
           {/* Waveform visualization - only show when not compact */}
           {!isCompact && (
             <div className="flex items-center justify-center h-12 w-full gap-0.5 px-4">
-              {generateWaveformBars()}
+              {renderWaveformBars()}
             </div>
           )}
 
diff --git a/frontend/src/routes/_auth.chat.$chatId.tsx b/frontend/src/routes/_auth.chat.$chatId.tsx
index 4b6fdea1..6afb14f9 100644
--- a/frontend/src/routes/_auth.chat.$chatId.tsx
+++ b/frontend/src/routes/_auth.chat.$chatId.tsx
@@ -27,6 +27,40 @@ import { useNavigate, useLocation } from "@tanstack/react-router";
 import { useIsMobile } from "@/utils/utils";
 import { useChatSession } from "@/hooks/useChatSession";
 
+// Global audio manager to prevent multiple TTS playing simultaneously
+class AudioManager {
+  private static instance: AudioManager;
+  private currentAudio: HTMLAudioElement | null = null;
+  private currentCleanup: (() => void) | null = null;
+
+  static getInstance(): AudioManager {
+    if (!AudioManager.instance) {
+      AudioManager.instance = new AudioManager();
+    }
+    return AudioManager.instance;
+  }
+
+  stopCurrent() {
+    if (this.currentAudio) {
+      this.currentAudio.pause();
+      this.currentAudio.src = "";
+      this.currentAudio = null;
+    }
+    if (this.currentCleanup) {
+      this.currentCleanup();
+      this.currentCleanup = null;
+    }
+  }
+
+  setCurrentAudio(audio: HTMLAudioElement, cleanup: () => void) {
+    this.stopCurrent(); // Stop any existing audio
+    this.currentAudio = audio;
+    this.currentCleanup = cleanup;
+  }
+}
+
+const audioManager = AudioManager.getInstance();
+
 export const Route = createFileRoute("/_auth/chat/$chatId")({
   component: ChatComponent
 });
@@ -90,6 +124,8 @@ function SystemMessage({
   const [isPlaying, setIsPlaying] = useState(false);
   const [upgradeDialogOpen, setUpgradeDialogOpen] = useState(false);
   const audioRef = useRef<HTMLAudioElement | null>(null);
+  const audioUrlRef = useRef<string | null>(null);
+  const abortControllerRef = useRef<AbortController | null>(null);
   const openai = useOpenAI();
   const { setBillingStatus } = useLocalState();
 
@@ -121,53 +157,105 @@ function SystemMessage({
     }
 
     if (isPlaying) {
-      // Stop playing
+      // Stop playing and cleanup
+      if (abortControllerRef.current) {
+        abortControllerRef.current.abort();
+        abortControllerRef.current = null;
+      }
       if (audioRef.current) {
         audioRef.current.pause();
+        audioRef.current.src = "";
         audioRef.current = null;
       }
+      if (audioUrlRef.current) {
+        URL.revokeObjectURL(audioUrlRef.current);
+        audioUrlRef.current = null;
+      }
       setIsPlaying(false);
       return;
     }
 
+    // Guard against empty text
+    if (!textWithoutThinking.trim()) {
+      return;
+    }
+
     try {
       setIsPlaying(true);
 
+      // Create abort controller for this request
+      const abortController = new AbortController();
+      abortControllerRef.current = abortController;
+
       // Generate speech using OpenAI TTS
-      const response = await openai.audio.speech.create({
-        // eslint-disable-next-line @typescript-eslint/no-explicit-any
-        model: "kokoro" as any,
-        // eslint-disable-next-line @typescript-eslint/no-explicit-any
-        voice: "af_sky+af_bella" as any,
-        input: textWithoutThinking,
-        response_format: "mp3"
-      });
+      const response = await openai.audio.speech.create(
+        {
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          model: "kokoro" as any,
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          voice: "af_sky+af_bella" as any,
+          input: textWithoutThinking,
+          response_format: "mp3"
+        },
+        { signal: abortController.signal }
+      );
 
       // Convert response to blob and create audio URL
       const blob = new Blob([await response.arrayBuffer()], { type: "audio/mp3" });
       const audioUrl = URL.createObjectURL(blob);
+      audioUrlRef.current = audioUrl;
 
       // Create and play audio
       const audio = new Audio(audioUrl);
       audioRef.current = audio;
 
+      // Register with audio manager to stop any other playing audio
+      audioManager.setCurrentAudio(audio, () => {
+        setIsPlaying(false);
+        if (audioUrlRef.current) {
+          URL.revokeObjectURL(audioUrlRef.current);
+          audioUrlRef.current = null;
+        }
+        audioRef.current = null;
+        abortControllerRef.current = null;
+      });
+
       audio.onended = () => {
         setIsPlaying(false);
-        URL.revokeObjectURL(audioUrl);
+        if (audioUrlRef.current) {
+          URL.revokeObjectURL(audioUrlRef.current);
+          audioUrlRef.current = null;
+        }
         audioRef.current = null;
+        abortControllerRef.current = null;
       };
 
       audio.onerror = () => {
         console.error("Error playing audio");
         setIsPlaying(false);
-        URL.revokeObjectURL(audioUrl);
+        if (audioUrlRef.current) {
+          URL.revokeObjectURL(audioUrlRef.current);
+          audioUrlRef.current = null;
+        }
         audioRef.current = null;
+        abortControllerRef.current = null;
       };
 
       await audio.play();
     } catch (error) {
+      // Ignore intentional aborts
+      if (error instanceof Error && error.name === "AbortError") {
+        return;
+      }
       console.error("TTS error:", error);
       setIsPlaying(false);
+      // Cleanup on error
+      if (audioUrlRef.current) {
+        URL.revokeObjectURL(audioUrlRef.current);
+        audioUrlRef.current = null;
+      }
+      audioRef.current = null;
+      abortControllerRef.current = null;
     }
   }, [textWithoutThinking, isPlaying, openai, canUseTTS]);
 
@@ -182,10 +270,22 @@ function SystemMessage({
   // Cleanup on unmount
   useEffect(() => {
     return () => {
+      // Abort any in-flight requests
+      if (abortControllerRef.current) {
+        abortControllerRef.current.abort();
+        abortControllerRef.current = null;
+      }
+      // Stop and cleanup audio
       if (audioRef.current) {
         audioRef.current.pause();
+        audioRef.current.src = "";
         audioRef.current = null;
       }
+      // Revoke object URL to prevent memory leak
+      if (audioUrlRef.current) {
+        URL.revokeObjectURL(audioUrlRef.current);
+        audioUrlRef.current = null;
+      }
     };
   }, []);