Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 181 additions & 31 deletions OST/Sources/App/AppState.swift

Large diffs are not rendered by default.

8 changes: 7 additions & 1 deletion OST/Sources/App/OSTApp.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@ struct OSTApp: App {
onOpenSettings: openSettings,
onOpenLogs: { windowManager.showLogViewer() },
onOpenSessions: { windowManager.showSessionHistory(recorder: appState.sessionRecorder, alwaysOnTop: settings.sessionWindowAlwaysOnTop) },
onToggleOverlayLock: { locked in windowManager.updateOverlayLock(locked: locked) },
onToggleOverlayLock: { locked in
windowManager.updateOverlayLock(locked: locked)
if settings.overlayDisplayMode == "split" {
settings.overlay2Locked = locked
windowManager.updateOverlay2Lock(locked: locked)
}
},
onQuit: quitApp
)
}
Expand Down
122 changes: 120 additions & 2 deletions OST/Sources/App/WindowManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,139 @@ import SwiftUI
final class WindowManager: ObservableObject {

private var overlayWindow: OverlayWindow?
private var overlayWindow2: OverlayWindow? // Translation window for split mode
private var settingsWindow: NSWindow?
private var logWindow: NSWindow?
private var sessionWindow: NSWindow?

// MARK: - Overlay

func showOverlay(appState: AppState, settings: UserSettings) {
let isSplit = settings.overlayDisplayMode == "split"

if isSplit {
showSplitOverlay(appState: appState, settings: settings)
} else {
showCombinedOverlay(appState: appState, settings: settings)
}
}

private func showCombinedOverlay(appState: AppState, settings: UserSettings) {
// Hide any split windows
hideOverlayWindow2()

if let existing = overlayWindow {
existing.makeKeyAndOrderFront(nil)
return
}
let window = OverlayWindow(appState: appState, settings: settings)
let view = AnyView(SubtitleView(
appState: appState,
settings: settings,
translationService: appState.translationService
))
let window = OverlayWindow(contentView: view, settings: settings, role: .combined)
window.makeKeyAndOrderFront(nil)
overlayWindow = window
}

private func showSplitOverlay(appState: AppState, settings: UserSettings) {
// Recognition window (primary)
if let existing = overlayWindow {
existing.makeKeyAndOrderFront(nil)
} else {
let recognitionView = AnyView(RecognitionOverlayView(
appState: appState,
settings: settings
))
let window = OverlayWindow(contentView: recognitionView, settings: settings, role: .recognition)
window.makeKeyAndOrderFront(nil)
overlayWindow = window
}

// Translation window (secondary)
if let existing = overlayWindow2 {
existing.makeKeyAndOrderFront(nil)
} else {
let translationView = AnyView(TranslationOverlayView(
appState: appState,
settings: settings,
translationService: appState.translationService
))
let window = OverlayWindow(contentView: translationView, settings: settings, role: .translation)
window.makeKeyAndOrderFront(nil)
overlayWindow2 = window
}
}

func updateOverlayLock(locked: Bool) {
overlayWindow?.updateLockState(locked: locked)
}

func updateOverlay2Lock(locked: Bool) {
overlayWindow2?.updateLockState(locked: locked)
}

func resetOverlay(settings: UserSettings) {
let isSplit = settings.overlayDisplayMode == "split"
if isSplit {
resetAllOverlaysSideBySide(settings: settings)
} else {
settings.overlayLocked = false
overlayWindow?.resetFrame()
overlayWindow?.updateLockState(locked: false)
settings.overlayFrameSaved = false
}
}

func resetOverlay2(settings: UserSettings) {
let isSplit = settings.overlayDisplayMode == "split"
if isSplit {
resetAllOverlaysSideBySide(settings: settings)
} else {
settings.overlay2Locked = false
overlayWindow2?.resetFrame()
overlayWindow2?.updateLockState(locked: false)
settings.overlay2FrameSaved = false
}
}

/// Resets both overlay windows side-by-side and unlocks them.
private func resetAllOverlaysSideBySide(settings: UserSettings) {
let windowWidth: CGFloat = 500
let windowHeight: CGFloat = 200
let gap: CGFloat = 20

// Center the pair on visible screen area (accounting for dock/menu bar)
let screen = NSScreen.main?.visibleFrame ?? NSRect(x: 0, y: 0, width: 1440, height: 900)
let totalWidth = windowWidth * 2 + gap
let startX = screen.origin.x + max(0, (screen.width - totalWidth) / 2)
let baseY = screen.origin.y + 200

let leftFrame = NSRect(x: startX, y: baseY, width: windowWidth, height: windowHeight)
let rightFrame = NSRect(x: startX + windowWidth + gap, y: baseY, width: windowWidth, height: windowHeight)

// Recognition window (left)
overlayWindow?.setFrame(leftFrame, display: true, animate: true)
settings.overlayLocked = false
overlayWindow?.updateLockState(locked: false)
settings.overlayFrameSaved = false

// Translation window (right)
overlayWindow2?.setFrame(rightFrame, display: true, animate: true)
settings.overlay2Locked = false
overlayWindow2?.updateLockState(locked: false)
settings.overlay2FrameSaved = false
}

func hideOverlay() {
overlayWindow?.orderOut(nil)
overlayWindow = nil
hideOverlayWindow2()
}

private func hideOverlayWindow2() {
overlayWindow2?.orderOut(nil)
overlayWindow2 = nil
}

// MARK: - Settings
Expand All @@ -40,7 +150,15 @@ final class WindowManager: ObservableObject {
NSApp.activate(ignoringOtherApps: true)
return
}
let view = SettingsView(settings: settings, onOpenLogs: onOpenLogs, onOpenSessions: onOpenSessions)
let view = SettingsView(
settings: settings,
onOpenLogs: onOpenLogs,
onOpenSessions: onOpenSessions,
onResetOverlay: { [weak self] in self?.resetOverlay(settings: settings) },
onResetOverlay2: { [weak self] in self?.resetOverlay2(settings: settings) },
onToggleOverlayLock: { [weak self] locked in self?.updateOverlayLock(locked: locked) },
onToggleOverlay2Lock: { [weak self] locked in self?.updateOverlay2Lock(locked: locked) }
)
let window = NSWindow(
contentRect: NSRect(x: 0, y: 0, width: 560, height: 480),
styleMask: [.titled, .closable, .miniaturizable],
Expand Down
14 changes: 10 additions & 4 deletions OST/Sources/Audio/SystemAudioCapture.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,19 @@ final class SystemAudioCapture: NSObject, @unchecked Sendable {
private var stream: SCStream?
private var continuation: AsyncStream<CMSampleBuffer>.Continuation?
private(set) var audioBuffers: AsyncStream<CMSampleBuffer>?
private var bufferCount: Int = 0
private let bufferLock = NSLock()
private var _bufferCount: Int = 0
private var bufferCount: Int {
get { bufferLock.withLock { _bufferCount } }
set { bufferLock.withLock { _bufferCount = newValue } }
}

/// Requests permission if needed, then starts capturing system audio.
/// Returns a fresh AsyncStream of audio buffers for each capture session.
func startCapture() async throws -> AsyncStream<CMSampleBuffer> {
guard stream == nil else {
AppLogger.post("Stream already active, returning existing", category: .audio)
return audioBuffers!
return audioBuffers ?? AsyncStream { $0.finish() }
}

bufferCount = 0
Expand Down Expand Up @@ -95,14 +100,15 @@ final class SystemAudioCapture: NSObject, @unchecked Sendable {
func stopCapture() async {
guard let current = stream else { return }
stream = nil
// Finish continuation BEFORE awaiting stopCapture to prevent dangling yields
continuation?.finish()
continuation = nil
AppLogger.post("Stopping capture (received \(bufferCount) audio buffers)", category: .audio)
do {
try await current.stopCapture()
} catch {
AppLogger.post("Stop error (non-fatal): \(error.localizedDescription)", category: .audio)
}
continuation?.finish()
continuation = nil
}

// MARK: - Helpers
Expand Down
11 changes: 11 additions & 0 deletions OST/Sources/Settings/UserSettings.swift
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ final class UserSettings: ObservableObject {
@AppStorage("overlayFrameY") var overlayFrameY: Double = 200
@AppStorage("overlayFrameSaved") var overlayFrameSaved: Bool = false

// Display mode: "combined" (single window) or "split" (recognition + translation)
@AppStorage("overlayDisplayMode") var overlayDisplayMode: String = "combined"

// Second overlay (translation window) frame
@AppStorage("overlay2FrameX") var overlay2FrameX: Double = 200
@AppStorage("overlay2FrameY") var overlay2FrameY: Double = 450
@AppStorage("overlay2Width") var overlay2Width: Double = 600
@AppStorage("overlay2Height") var overlay2Height: Double = 200
@AppStorage("overlay2FrameSaved") var overlay2FrameSaved: Bool = false
@AppStorage("overlay2Locked") var overlay2Locked: Bool = true

var fontColor: Color {
get { Self.decodeColor(fontColorData) ?? .white }
set { fontColorData = Self.encodeColor(newValue) }
Expand Down
38 changes: 23 additions & 15 deletions OST/Sources/Speech/SpeechRecognizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ final class SpeechRecognizer: ObservableObject {
recognitionRequest = nil
recognitionTask = nil
currentText = ""
finalizedText = ""
}

// MARK: - Recognition Task
Expand All @@ -88,42 +89,49 @@ final class SpeechRecognizer: ObservableObject {
throw SpeechRecognizerError.recognizerUnavailable
}

// Clean up previous task without clearing isActive
recognitionRequest?.endAudio()
recognitionTask?.cancel()
recognitionRequest = nil
recognitionTask = nil

// Create the new request BEFORE cleaning up the old one
// to minimize the window where recognitionRequest is nil
// and audio buffers from startConsumingBuffers are lost.
let request = SFSpeechAudioBufferRecognitionRequest()
request.shouldReportPartialResults = true
if useOnDevice && recognizer.supportsOnDeviceRecognition {
request.requiresOnDeviceRecognition = true
}
request.addsPunctuation = true
recognitionRequest = request

// Now clean up previous task
let oldRequest = recognitionRequest
let oldTask = recognitionTask
recognitionRequest = request // Swap immediately so append() uses new request

oldRequest?.endAudio()
oldTask?.cancel()

AppLogger.shared.log("Starting recognition task (onDevice: \(useOnDevice))", category: .speech)

recognitionTask = recognizer.recognitionTask(with: request) { [weak self] result, error in
guard let self else { return }
if let result {
Task { @MainActor in
Task { @MainActor in
if let result {
let text = result.bestTranscription.formattedString
self.currentText = text
if result.isFinal {
AppLogger.shared.log("Final: \"\(text)\"", category: .speech)
self.finalizedText += (self.finalizedText.isEmpty ? "" : " ") + text
self.finalizedText = ""
self.currentText = ""
// Auto-restart recognition for continuous listening
self.restartRecognition()
return
}
// Partial result with concurrent error — task is dying
if error != nil {
AppLogger.shared.log("Partial result with error, restarting", category: .speech)
self.restartRecognition()
return
}
}
}
if let error {
Task { @MainActor in
if let error, result == nil {
AppLogger.shared.log("Recognition error: \(error.localizedDescription)", category: .error)
self.currentText = ""
// Auto-restart on transient errors
self.restartRecognition()
}
}
Expand Down
22 changes: 2 additions & 20 deletions OST/Sources/Translation/TranslationService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -35,29 +35,11 @@ final class TranslationService: ObservableObject {
return try await fallbackTranslation(trimmed)
}

/// Translates text with context from recent entries for consistency.
/// Sends context + new text separated by newlines, then extracts only the new translation.
func translateWithContext(_ text: String, context: [String]) async throws -> String {
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else { return "" }
guard !context.isEmpty, let session else {
return try await translate(trimmed)
}

let fullText = context.joined(separator: "\n") + "\n" + trimmed
let response = try await session.translate(fullText)
let resultLines = response.targetText.components(separatedBy: "\n")
let contextLineCount = context.count
if resultLines.count > contextLineCount {
return resultLines.dropFirst(contextLineCount).joined(separator: "\n").trimmingCharacters(in: .whitespacesAndNewlines)
}
return response.targetText
}

private func fallbackTranslation(_ text: String) async throws -> String {
let sourceLang = configuration?.source?.languageCode?.identifier ?? "en"
let targetLang = configuration?.target?.languageCode?.identifier ?? "ko"
let encoded = text.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? text
let urlString = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=\(sourceLang)&tl=ko&dt=t&q=\(encoded)"
let urlString = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=\(sourceLang)&tl=\(targetLang)&dt=t&q=\(encoded)"

guard let url = URL(string: urlString) else { return text }

Expand Down
Loading