[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RlNZLER/Subtitle-Generator-Translator/blob/main/src/whisper.ipynb)

# Step 1:
# Execute the following task and upload an audio file, or files, to the content directory while you wait for the task to complete.

In [None]:
!pip install git+https://github.com/openai/whisper.git

# Step 2:
# Once the above task has completed and all audio files have successfully been uploaded to the content directory, execute the follow task.

In [None]:
import glob, os
audioFiles = glob.glob('/content/*.*')
for i in range(len(audioFiles)):
  for audioFile in glob.glob(audioFiles[i]):
    baseFile = os.path.splitext(audioFile)[0]
    !whisper --model turbo --output_format srt --task transcribe "$audioFile"
    os.remove(audioFile)

# Step 3:
# Download your SRT files from the content directory.

## Install & imports

In [None]:
# @title Install dependencies
!pip -q install srt deep-translator deepl

import os
import time
from typing import List
import srt

# Colab helpers
try:
    from google.colab import files
except ImportError:
    files = None

In [None]:
# @title Upload your SRT file
# @markdown Click "Choose Files" and select your `.srt`

input_path = '/content/*.*'
print(f"Uploaded: {input_path}")


## Configure translation backend (Google by default)

In [None]:
# @title Backend settings
# @markdown - **backend**: "google" (no key) or "deepl" (requires API key)<br>
# @markdown - **source_lang**: "auto" to autodetect<br>
# @markdown - **target_lang**: e.g., "en" or "en-GB" (DeepL maps to EN)

backend       = "google"  # @param ["google", "deepl"]
source_lang   = "auto"    # @param {type:"string"}
target_lang   = "en"      # @param {type:"string"}
batch_size    = 40        # @param {type:"integer"}
sleep_seconds = 0.5       # @param {type:"number"}

# For DeepL, set your API key here or in an env var named DEEPL_API_KEY
DEEPL_API_KEY = ""        # @param {type:"string"}
if not DEEPL_API_KEY:
    DEEPL_API_KEY = os.environ.get("DEEPL_API_KEY", "")


## Translate and download

In [None]:
# @title Translate and download the English SRT
from typing import List

class Translator:
    def translate_batch(self, texts: List[str]) -> List[str]:
        raise NotImplementedError

class GoogleTranslatorBackend(Translator):
    def __init__(self, source: str = "auto", target: str = "en"):
        from deep_translator import GoogleTranslator
        self._translator = GoogleTranslator(source=source, target=target)

    def translate_batch(self, texts: List[str]) -> List[str]:
        return self._translator.translate_batch(texts)

class DeepLBackend(Translator):
    def __init__(self, auth_key: str, target: str = "EN", source: str | None = None):
        import deepl
        self._translator = deepl.Translator(auth_key)
        self._source = None if source in (None, "auto") else source
        self._target = target.upper()  # EN, EN-GB, EN-US are fine

    def translate_batch(self, texts: List[str]) -> List[str]:
        import deepl
        res = self._translator.translate_text(
            texts,
            target_lang=self._target,
            source_lang=self._source,
            formality="default",
        )
        return [r.text if isinstance(r, deepl.TextResult) else str(r) for r in res]

def chunked(seq: List[str], n: int) -> List[List[str]]:
    return [seq[i:i + n] for i in range(0, len(seq), n)]

def build_translator(backend: str, source: str, target: str, deepl_key: str = "") -> Translator:
    if backend.lower() == "google":
        return GoogleTranslatorBackend(source=source, target=target)
    elif backend.lower() == "deepl":
        key = deepl_key or os.environ.get("DEEPL_API_KEY", "")
        if not key:
            raise ValueError("DeepL selected but no key provided. Set DEEPL_API_KEY or fill DEEPL_API_KEY cell value.")
        # Map 'en' variants to EN for DeepL
        t = "EN" if target.lower().startswith("en") else target
        return DeepLBackend(auth_key=key, target=t, source=source)
    else:
        raise ValueError(f"Unknown backend: {backend}")

def translate_srt(
    input_path: str,
    output_path: str,
    backend: str = "google",
    source_lang: str = "auto",
    target_lang: str = "en",
    batch_size: int = 40,
    sleep_between_batches: float = 0.5,
    deepl_key: str = "",
):
    with open(input_path, "r", encoding="utf-8-sig") as f:
        raw = f.read()

    subtitles = list(srt.parse(raw))
    if not subtitles:
        raise ValueError("No subtitles found in the uploaded file.")

    texts = [s.content for s in subtitles]
    translator = build_translator(backend, source_lang, target_lang, deepl_key)

    translated_all: List[str] = []
    for i, batch in enumerate(chunked(texts, batch_size), start=1):
        try:
            translated = translator.translate_batch(batch)
        except Exception as e:
            # simple retry once for transient failures on public endpoints
            if backend.lower() == "google":
                time.sleep(2.0)
                translated = translator.translate_batch(batch)
            else:
                raise e
        translated_all.extend(translated)
        if sleep_between_batches and i * batch_size < len(texts):
            time.sleep(sleep_between_batches)

    for sub, new_text in zip(subtitles, translated_all):
        sub.content = new_text

    out_text = srt.compose(subtitles)
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(out_text)

# Run translation
base, ext = os.path.splitext(input_path)
output_path = f"{base}_en.srt" if target_lang.lower().startswith("en") else f"{base}_{target_lang}.srt"

translate_srt(
    input_path=input_path,
    output_path=output_path,
    backend=backend,
    source_lang=source_lang,
    target_lang=target_lang,
    batch_size=batch_size,
    sleep_between_batches=sleep_seconds,
    deepl_key=DEEPL_API_KEY,
)

print(f"✅ Done. Saved: {output_path}")

# Offer download
if files is not None:
    files.download(output_path)
