This is a notebook for transcribing and translating subtitles using Google Colab, OpenAI API and Whisper.

Please read the README for further information on parameters.

In [None]:
import os

# Need to modify
episode_path = "20250424"
# You can add extra context here:
guest_context = "The guest in this episode was [...]. She is a [...]. " # Leave empty "" if no guests
extra_context = "This time, the topic was [...]. " # Describe the topic and important keywords

# Might need to modify. One batch ~10-30 tokens. Larger batch = more context, but higher risk of hitting API limits
batch_size = 100

# Need to modify once (first time)
background_context = "The subtitles are from a podcast between [...]. Generally, they talk about [...]. "
language = "et" # ISO 639-1. Language 2-letter code
language_full = "Estonian" # Full name of language
drive_path = "/content/drive/MyDrive/my_podcast_path" # if the files are under your own drive, then the path you see in Drive UI comes after "/content/drive/MyDrive/"
audiofile_name = "audio.mp3"

# Probably don't need to modify, unless you prefer something else:
initial_subtitles_name = f"init_{language}.srt"
corrected_subtitles_name = f"final_{language}.srt"
translated_subtitles_name = f"final_en.srt"

whisper_model = "large-v3"
openai_model = "gpt-4.1"


In [None]:
!pip install -U openai-whisper
!pip install -U openai

In [None]:
import whisper

model_size = whisper_model

model = whisper.load_model(model_size)

In [None]:
result = model.transcribe(os.path.join(drive_path,episode_path,audiofile_name), language=language)

In [None]:
def format_timestamp(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    milliseconds = int(round((seconds - int(seconds)) * 1000))
    return f"{hours:02}:{minutes:02}:{secs:02},{milliseconds:03}"

In [None]:
srt_text = ""
sgm_len = len(result["segments"])
for s in result["segments"]:
  s_id = s["id"]
  s_start = s["start"]
  s_end = s["end"]
  s_text = s["text"].strip()

  if s_id < sgm_len-1 and s_id > 0:
    srt_text += "\n\n"

  srt_text += f"{s_id}\n{format_timestamp(s_start)} --> {format_timestamp(s_end)}\n{s_text}"


with open(os.path.join(drive_path,episode_path,initial_subtitles_name), "w", encoding="utf-8") as file:
  file.write(srt_text)




In [None]:

import openai
import getpass
from google.colab import files
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('oai')

client = openai.OpenAI()


# Read and batch the subtitle blocks
def load_srt_blocks(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    return content.strip().split('\n\n')

blocks = load_srt_blocks(os.path.join(drive_path,episode_path,initial_subtitles_name))

def batch_blocks(blocks, batch_size=batch_size):
    for i in range(0, len(blocks), batch_size):
        yield blocks[i:i + batch_size]

# Prompt templates

orig_prompt_template = (
    f"You will receive subtitle segments in SRT format in {language_full}. "
    f"{background_context} "
    f"{guest_context} "
    f"{extra_context} "
    f"Please correct typos and fix grammar where needed, but preserve meaning. "
    f"Do not translate. Keep the original SRT format with timestamps unchanged.\n\n{{batch}}\n\nCorrected subtitles:"
)

en_prompt_template = (
    f"Translate the following corrected {language_full} SRT subtitles to English. "
    f"Keep the timestamps and formatting the same. Do not add or remove lines.\n\n{{batch}}\n\nTranslated subtitles:"
)

# OpenAI API call
def gpt_call(prompt):
    response = client.chat.completions.create(
        model=openai_model,
        messages=[
            {"role": "system", "content": "You are a subtitle correction and translation assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.2,
    )
    return response.choices[0].message.content

# Process batches
corrected_orig = []
translated_en = []

for batch_num, batch in enumerate(batch_blocks(blocks), start=1):
    print(f"Processing batch {batch_num}...")

    joined_batch = '\n\n'.join(batch)

    # Correct Estonian
    orig_prompt = orig_prompt_template.format(batch=joined_batch)
    corrected = gpt_call(orig_prompt)
    corrected_orig.append(corrected)

    # Translate to English
    en_prompt = en_prompt_template.format(batch=corrected)
    translated = gpt_call(en_prompt)
    translated_en.append(translated)

# Write outputs 
with open(os.path.join(drive_path,episode_path,corrected_subtitles_name), "w", encoding="utf-8") as f:
    f.write("\n\n".join(corrected_orig))

with open(os.path.join(drive_path,episode_path,translated_subtitles_name), "w", encoding="utf-8") as f:
    f.write("\n\n".join(translated_en))
