BasedHardware · mdmohsin7 · Jun 1, 2026 · Jun 1, 2026 · greptile-apps · Jun 1, 2026
diff --git a/app/lib/services/sockets/transcription_service.dart b/app/lib/services/sockets/transcription_service.dart
@@ -62,7 +62,7 @@ class ConversationTranscriptSegmentSocketService extends TranscriptSegmentSocket
 
 class CustomSttTranscriptSegmentSocketService extends TranscriptSegmentSocketService {
   CustomSttTranscriptSegmentSocketService.create(super.sampleRate, super.codec, super.language, {super.source})
-    : super.create(includeSpeechProfile: true, customSttMode: true);
+      : super.create(includeSpeechProfile: true, customSttMode: true);
 }
 
 enum SocketServiceState { connected, disconnected }
@@ -97,8 +97,7 @@ class TranscriptSegmentSocketService implements IPureSocketListener {
     this.sttConfigId,
     this.onboardingMode = false,
   }) {
-    var params =
-        '?language=$language&sample_rate=$sampleRate&codec=$codec&uid=${SharedPreferencesUtil().uid}'
+    var params = '?language=$language&sample_rate=$sampleRate&codec=$codec&uid=${SharedPreferencesUtil().uid}'
         '&include_speech_profile=$includeSpeechProfile&stt_service=${SharedPreferencesUtil().transcriptionModel}'
         '&conversation_timeout=${SharedPreferencesUtil().conversationSilenceDuration}';
 
@@ -117,6 +116,11 @@ class TranscriptSegmentSocketService implements IPureSocketListener {
     // Enable server-side speaker auto-assignment (backward compatibility flag)
     params += '&speaker_auto_assign=enabled';
 
+    // Whether the backend may auto-create a new person when it detects a name.
+    // Mirrors the user's "Auto-create Speakers" setting; a detected name with no
+    // existing match is still surfaced for manual tagging when this is off.
+    params += '&create_speakers=${SharedPreferencesUtil().autoCreateSpeakersEnabled}';
+
     if (SharedPreferencesUtil().vadGateEnabled) {
       params += '&vad_gate=enabled';
     }
@@ -345,9 +349,8 @@ class TranscriptSocketServiceFactory {
     if (config.provider == SttProvider.geminiLive) {
       return GeminiStreamingSttSocket(
         apiKey: config.apiKey ?? '',
-        model: config.effectiveModel.isNotEmpty
-            ? config.effectiveModel
-            : 'gemini-2.5-flash-native-audio-preview-12-2025',
+        model:
+            config.effectiveModel.isNotEmpty ? config.effectiveModel : 'gemini-2.5-flash-native-audio-preview-12-2025',
         language: config.effectiveLanguage,
         sampleRate: sampleRate,
         transcoder: transcoder,
@@ -357,12 +360,10 @@ class TranscriptSocketServiceFactory {
     // Deepgram Live and other streaming providers
     final requestConfig = config.requestConfig;
     final url = requestConfig['url'] ?? config.effectiveUrl;
-    final headers = requestConfig['headers'] != null
-        ? Map<String, String>.from(requestConfig['headers'])
-        : (config.headers ?? {});
-    final params = requestConfig['params'] != null
-        ? Map<String, String>.from(requestConfig['params'])
-        : (config.params ?? {});
+    final headers =
+        requestConfig['headers'] != null ? Map<String, String>.from(requestConfig['headers']) : (config.headers ?? {});
+    final params =
+        requestConfig['params'] != null ? Map<String, String>.from(requestConfig['params']) : (config.params ?? {});
 
     // Build WebSocket URL with query params
     final wsUrl = _buildUrlWithParams(url, params);
@@ -386,12 +387,10 @@ class TranscriptSocketServiceFactory {
 
     final requestConfig = config.requestConfig;
     final url = requestConfig['url'] ?? config.effectiveUrl;
-    final headers = requestConfig['headers'] != null
-        ? Map<String, String>.from(requestConfig['headers'])
-        : (config.headers ?? {});
-    final params = requestConfig['params'] != null
-        ? Map<String, String>.from(requestConfig['params'])
-        : (config.params ?? {});
+    final headers =
+        requestConfig['headers'] != null ? Map<String, String>.from(requestConfig['headers']) : (config.headers ?? {});
+    final params =
+        requestConfig['params'] != null ? Map<String, String>.from(requestConfig['params']) : (config.params ?? {});
     final audioFieldName = requestConfig['audio_field_name'] ?? config.audioFieldName ?? 'file';
     final requestType = config.effectiveRequestType;
 

diff --git a/backend/routers/transcribe.py b/backend/routers/transcribe.py
@@ -232,6 +232,7 @@ async def _stream_handler(
     custom_stt_mode: CustomSttMode = CustomSttMode.disabled,
     onboarding_mode: bool = False,
     speaker_auto_assign_enabled: bool = False,
+    create_speakers: bool = True,
     vad_gate_override: Optional[str] = None,
     call_id: Optional[str] = None,
 ):
@@ -2227,7 +2228,7 @@ async def stream_transcript_process():
                         person = user_db.get_person_by_name(uid, detected_name)
                         if person:
                             person_id = person['id']
-                        else:
+                        elif create_speakers:
                             # Backend creates person if missing
                             person_id = str(uuid.uuid4())
                             user_db.create_person(
@@ -2239,20 +2240,25 @@ async def stream_transcript_process():
                                     'updated_at': datetime.now(timezone.utc),
                                 },
                             )
+                        else:
+                            # User disabled auto-create: don't persist a new person.
+                            # Still surface the detected name so it can be tagged manually.
+                            person_id = None
                         _send_message_event(
                             SpeakerLabelSuggestionEvent(
                                 speaker_id=segment.speaker_id,
-                                person_id=_person_id_for_client(person_id),
+                                person_id=_person_id_for_client(person_id) if person_id else "",
                                 person_name=detected_name,
                                 segment_id=segment.id,
                             )
                         )
                         # Set maps for future segments, but only if diarization is active
                         # (speaker_id > 0 means diarization assigned a real speaker)
                         # Set maps for future segments using helper function
-                        if should_update_speaker_to_person_map(segment.speaker_id):
-                            speaker_to_person_map[segment.speaker_id] = (person_id, detected_name)
-                        segment_person_assignment_map[segment.id] = person_id
+                        if person_id:
+                            if should_update_speaker_to_person_map(segment.speaker_id):
+                                speaker_to_person_map[segment.speaker_id] = (person_id, detected_name)
+                            segment_person_assignment_map[segment.id] = person_id
                         suggested_segments.add(segment.id)
 
         # Wait for speaker_identification_task to finish consuming its queue and spawning
@@ -2868,6 +2874,7 @@ async def _listen(
     custom_stt_mode: CustomSttMode = CustomSttMode.disabled,
     onboarding_mode: bool = False,
     speaker_auto_assign_enabled: bool = False,
+    create_speakers: bool = True,
     vad_gate_override: Optional[str] = None,
     call_id: Optional[str] = None,
 ):
@@ -2895,6 +2902,7 @@ async def _listen(
         custom_stt_mode=custom_stt_mode,
         onboarding_mode=onboarding_mode,
         speaker_auto_assign_enabled=speaker_auto_assign_enabled,
+        create_speakers=create_speakers,
         vad_gate_override=vad_gate_override,
         call_id=call_id,
     )
@@ -2916,6 +2924,7 @@ async def listen_handler(
     custom_stt: str = 'disabled',
     onboarding: str = 'disabled',
     speaker_auto_assign: str = 'disabled',
+    create_speakers: bool = True,
     vad_gate: str = '',
     call_id: Optional[str] = None,
 ):
@@ -2937,6 +2946,7 @@ async def listen_handler(
         custom_stt_mode=custom_stt_mode,
         onboarding_mode=onboarding_mode,
         speaker_auto_assign_enabled=speaker_auto_assign_enabled,
+        create_speakers=create_speakers,
         vad_gate_override=vad_gate_override,
         call_id=call_id,
     )