Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 17 additions & 18 deletions app/lib/services/sockets/transcription_service.dart
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class ConversationTranscriptSegmentSocketService extends TranscriptSegmentSocket

class CustomSttTranscriptSegmentSocketService extends TranscriptSegmentSocketService {
CustomSttTranscriptSegmentSocketService.create(super.sampleRate, super.codec, super.language, {super.source})
: super.create(includeSpeechProfile: true, customSttMode: true);
: super.create(includeSpeechProfile: true, customSttMode: true);
}

enum SocketServiceState { connected, disconnected }
Expand Down Expand Up @@ -97,8 +97,7 @@ class TranscriptSegmentSocketService implements IPureSocketListener {
this.sttConfigId,
this.onboardingMode = false,
}) {
var params =
'?language=$language&sample_rate=$sampleRate&codec=$codec&uid=${SharedPreferencesUtil().uid}'
var params = '?language=$language&sample_rate=$sampleRate&codec=$codec&uid=${SharedPreferencesUtil().uid}'
'&include_speech_profile=$includeSpeechProfile&stt_service=${SharedPreferencesUtil().transcriptionModel}'
'&conversation_timeout=${SharedPreferencesUtil().conversationSilenceDuration}';

Expand All @@ -117,6 +116,11 @@ class TranscriptSegmentSocketService implements IPureSocketListener {
// Enable server-side speaker auto-assignment (backward compatibility flag)
params += '&speaker_auto_assign=enabled';

// Whether the backend may auto-create a new person when it detects a name.
// Mirrors the user's "Auto-create Speakers" setting; a detected name with no
// existing match is still surfaced for manual tagging when this is off.
params += '&create_speakers=${SharedPreferencesUtil().autoCreateSpeakersEnabled}';

if (SharedPreferencesUtil().vadGateEnabled) {
params += '&vad_gate=enabled';
}
Expand Down Expand Up @@ -345,9 +349,8 @@ class TranscriptSocketServiceFactory {
if (config.provider == SttProvider.geminiLive) {
return GeminiStreamingSttSocket(
apiKey: config.apiKey ?? '',
model: config.effectiveModel.isNotEmpty
? config.effectiveModel
: 'gemini-2.5-flash-native-audio-preview-12-2025',
model:
config.effectiveModel.isNotEmpty ? config.effectiveModel : 'gemini-2.5-flash-native-audio-preview-12-2025',
language: config.effectiveLanguage,
sampleRate: sampleRate,
transcoder: transcoder,
Expand All @@ -357,12 +360,10 @@ class TranscriptSocketServiceFactory {
// Deepgram Live and other streaming providers
final requestConfig = config.requestConfig;
final url = requestConfig['url'] ?? config.effectiveUrl;
final headers = requestConfig['headers'] != null
? Map<String, String>.from(requestConfig['headers'])
: (config.headers ?? {});
final params = requestConfig['params'] != null
? Map<String, String>.from(requestConfig['params'])
: (config.params ?? {});
final headers =
requestConfig['headers'] != null ? Map<String, String>.from(requestConfig['headers']) : (config.headers ?? {});
final params =
requestConfig['params'] != null ? Map<String, String>.from(requestConfig['params']) : (config.params ?? {});

// Build WebSocket URL with query params
final wsUrl = _buildUrlWithParams(url, params);
Expand All @@ -386,12 +387,10 @@ class TranscriptSocketServiceFactory {

final requestConfig = config.requestConfig;
final url = requestConfig['url'] ?? config.effectiveUrl;
final headers = requestConfig['headers'] != null
? Map<String, String>.from(requestConfig['headers'])
: (config.headers ?? {});
final params = requestConfig['params'] != null
? Map<String, String>.from(requestConfig['params'])
: (config.params ?? {});
final headers =
requestConfig['headers'] != null ? Map<String, String>.from(requestConfig['headers']) : (config.headers ?? {});
final params =
requestConfig['params'] != null ? Map<String, String>.from(requestConfig['params']) : (config.params ?? {});
final audioFieldName = requestConfig['audio_field_name'] ?? config.audioFieldName ?? 'file';
final requestType = config.effectiveRequestType;

Expand Down
20 changes: 15 additions & 5 deletions backend/routers/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ async def _stream_handler(
custom_stt_mode: CustomSttMode = CustomSttMode.disabled,
onboarding_mode: bool = False,
speaker_auto_assign_enabled: bool = False,
create_speakers: bool = True,
vad_gate_override: Optional[str] = None,
call_id: Optional[str] = None,
):
Expand Down Expand Up @@ -2227,7 +2228,7 @@ async def stream_transcript_process():
person = user_db.get_person_by_name(uid, detected_name)
if person:
person_id = person['id']
else:
elif create_speakers:
# Backend creates person if missing
person_id = str(uuid.uuid4())
user_db.create_person(
Expand All @@ -2239,20 +2240,25 @@ async def stream_transcript_process():
'updated_at': datetime.now(timezone.utc),
},
)
else:
# User disabled auto-create: don't persist a new person.
# Still surface the detected name so it can be tagged manually.
person_id = None
_send_message_event(
SpeakerLabelSuggestionEvent(
speaker_id=segment.speaker_id,
person_id=_person_id_for_client(person_id),
person_id=_person_id_for_client(person_id) if person_id else "",
person_name=detected_name,
segment_id=segment.id,
)
)
# Set maps for future segments, but only if diarization is active
# (speaker_id > 0 means diarization assigned a real speaker)
# Set maps for future segments using helper function
if should_update_speaker_to_person_map(segment.speaker_id):
speaker_to_person_map[segment.speaker_id] = (person_id, detected_name)
segment_person_assignment_map[segment.id] = person_id
if person_id:
if should_update_speaker_to_person_map(segment.speaker_id):
speaker_to_person_map[segment.speaker_id] = (person_id, detected_name)
segment_person_assignment_map[segment.id] = person_id
suggested_segments.add(segment.id)
Comment on lines +2258 to 2262
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Repeated suggestions for same speaker when create_speakers=False

When person_id is None (unmatched name, auto-create disabled), speaker_to_person_map is not updated for that speaker_id. This means every subsequent segment from the same speaker whose text also triggers detect_speaker_from_text will fall through the map lookup and emit another SpeakerLabelSuggestionEvent(person_id="", person_name=detected_name). With create_speakers=True the map entry short-circuits future segments, but here it stays empty, so the client may receive many duplicate unresolved suggestions for the same speaker within a single session. Consider storing a sentinel in speaker_to_person_map (e.g. (None, detected_name)) so subsequent segments from that speaker_id are handled via the map path and only one suggestion is emitted.


# Wait for speaker_identification_task to finish consuming its queue and spawning
Expand Down Expand Up @@ -2868,6 +2874,7 @@ async def _listen(
custom_stt_mode: CustomSttMode = CustomSttMode.disabled,
onboarding_mode: bool = False,
speaker_auto_assign_enabled: bool = False,
create_speakers: bool = True,
vad_gate_override: Optional[str] = None,
call_id: Optional[str] = None,
):
Expand Down Expand Up @@ -2895,6 +2902,7 @@ async def _listen(
custom_stt_mode=custom_stt_mode,
onboarding_mode=onboarding_mode,
speaker_auto_assign_enabled=speaker_auto_assign_enabled,
create_speakers=create_speakers,
vad_gate_override=vad_gate_override,
call_id=call_id,
)
Expand All @@ -2916,6 +2924,7 @@ async def listen_handler(
custom_stt: str = 'disabled',
onboarding: str = 'disabled',
speaker_auto_assign: str = 'disabled',
create_speakers: bool = True,
vad_gate: str = '',
call_id: Optional[str] = None,
):
Expand All @@ -2937,6 +2946,7 @@ async def listen_handler(
custom_stt_mode=custom_stt_mode,
onboarding_mode=onboarding_mode,
speaker_auto_assign_enabled=speaker_auto_assign_enabled,
create_speakers=create_speakers,
vad_gate_override=vad_gate_override,
call_id=call_id,
)
Expand Down
Loading