Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backend/routers/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ async def close(code: int = 1000):
# Transcripts
#
current_conversation_id = None
translation_enabled = including_combined_segments and stt_language == 'multi' and language not in ["multi", "auto"]
translation_enabled = including_combined_segments and (stt_language == 'multi' and language not in ["multi"])
language_cache = TranscriptSegmentLanguageCache()
translation_service = TranslationService()

Expand Down Expand Up @@ -728,7 +728,7 @@ async def stream_transcript_process():
nonlocal current_conversation_id, including_combined_segments, translation_enabled, speech_profile_processed, speaker_to_person_map, suggested_segments, words_transcribed_since_last_record

while websocket_active or len(realtime_segment_buffers) > 0 or len(realtime_photo_buffers) > 0:
await asyncio.sleep(0.3)
await asyncio.sleep(0.6)

if not realtime_segment_buffers and not realtime_photo_buffers:
continue
Expand Down
117 changes: 111 additions & 6 deletions backend/utils/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,31 +13,136 @@

PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")

# A set of common English non-lexical utterances that can confuse language detectors.
# This list helps prevent misclassification of short, ambiguous sounds.
_non_lexical_utterances = {
# Hesitations and fillers
'ah',
'aha',
'ahem',
'eh',
'er',
'erm',
'ew',
'ha',
'hah',
'harrumph',
'hee',
'heh',
'hm',
'hmm',
'hmmm',
'ho',
'huh',
'mm',
'mmm',
'mhm',
'mhmm',
'oh',
'ooh',
'um',
'uh',
'uh-huh',
'uh-oh',
'whoa',
# Interjections and exclamations
'ack',
'aah',
'ach',
'agreed',
'argh',
'aw',
'aww',
'bam',
'bah',
'boo',
'brr',
'cheers',
'congrats',
'dang',
'darn',
'duh',
'eek',
'eep',
'encore',
'gosh',
'grr',
'gulp',
'haha',
'hehe',
'hey',
'hooray',
'hurrah',
'huzzah',
'jeez',
'meh',
'ouch',
'ow',
'oy',
'phew',
'pfft',
'pish',
'psst',
'shh',
'shoo',
'tsk',
'tut-tut',
'ugh',
'wahoo',
'whew',
'whoops',
'wow',
'yahoo',
'yay',
'yeah',
# Common short responses that can be language-agnostic
'yep',
'yup',
'yo',
'yikes',
'yowza',
'zing',
}

# Pre-compile the regex pattern for non-lexical utterances for efficiency.
_non_lexical_utterances_pattern = re.compile(
r'\b(' + '|'.join(re.escape(word) for word in _non_lexical_utterances) + r')\b', re.IGNORECASE
)

# Initialize the translation client globally
_client = translate_v3.TranslationServiceClient()
_parent = f"projects/{PROJECT_ID}/locations/global"
_mime_type = "text/plain"


def detect_language(text: str) -> str | None:
def detect_language(text: str, remove_non_lexical: bool = False) -> str | None:
"""
Detects the language of the provided text using Google Cloud Translate API.
Uses a cache to avoid redundant detections.

Args:
text: The text to detect language for
remove_non_lexical: If True, removes common non-lexical utterances before detection.

Returns:
The language code of the detected language (e.g., 'en', 'vi', 'fr') if confidence >= 1,
or None if no language with sufficient confidence is found
"""
if text in detection_cache:
detection_cache.move_to_end(text)
return detection_cache[text]
text_for_detection = text
if remove_non_lexical:
cleaned_text = _non_lexical_utterances_pattern.sub('', text)
text_for_detection = re.sub(r'\s+', ' ', cleaned_text).strip()

if not text_for_detection:
return None

if text_for_detection in detection_cache:
detection_cache.move_to_end(text_for_detection)
return detection_cache[text_for_detection]

try:
# Call the Google Cloud Translate API to detect language
response = _client.detect_language(parent=_parent, content=text, mime_type=_mime_type)
response = _client.detect_language(parent=_parent, content=text_for_detection, mime_type=_mime_type)

detected_language = None
# Return the language code only if confidence is >= 1
Expand All @@ -49,7 +154,7 @@ def detect_language(text: str) -> str | None:

if len(detection_cache) >= MAX_DETECTION_CACHE_SIZE:
detection_cache.popitem(last=False)
detection_cache[text] = detected_language
detection_cache[text_for_detection] = detected_language
return detected_language
except Exception as e:
print(f"Language detection error: {e}")
Expand Down
2 changes: 1 addition & 1 deletion backend/utils/translation_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def is_in_target_language(self, segment_id: str, text: str, target_language: str

# Check each new sentence. If any is not in the target language, the whole segment is marked for translation.
for sentence in sentences:
detected_lang = detect_language(sentence)
detected_lang = detect_language(sentence, remove_non_lexical=True)
if detected_lang and detected_lang != target_language:
self.cache[segment_id] = (text, False)
return False
Expand Down