# **Translate an MKV subtitle from english to european portuguese**

### Breakdown of the Code:

1. **Environment Setup:**
   - **Loading Environment Variables:** Handles the loading of environment variables from a `.env` file.
   - **OpenAI Client Initialization:** Sets up the OpenAI client with the API key.

2. **Translation Function:**
   - **`translate_text`:** Translates text using the OpenAI GPT-4 model and handles errors.

3. **SRT Parsing and Writing:**
   - **`parse_srt`:** Parses SRT files into a list of subtitle segments.
   - **`write_srt`:** Writes subtitle segments to a new SRT file.

4. **Subtitle Extraction and Track Information:**
   - **`extract_subtitles`:** Extracts subtitles from MKV files using `mkvextract`.
   - **`get_subtitle_track_details`:** Retrieves details about subtitle tracks from an MKV file using `mkvinfo`.

5. **User Interaction:**
   - **`get_user_confirmation`:** Prompts the user for a yes/no confirmation using a Tkinter messagebox.
   - **`select_mkv_file`:** Opens a file dialog to let the user select an MKV file.

6. **Main Function:**
   - **`main`:** Coordinates the overall process, including file selection, subtitle extraction, user prompts, and translation.

In [1]:
#!pip install --upgrade httpx
#!pip install transformers torch pysrt tqdm sacremoses nest_asyncio

In [2]:
import os
import re
import subprocess
import tkinter as tk
from tkinter import filedialog, messagebox, Tk
from dotenv import load_dotenv
import openai
from tqdm.auto import tqdm
from IPython.display import display, Markdown, HTML
from termcolor import colored

# --- Environment Setup ---
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise ValueError("API key not found. Set the OPENAI_API_KEY environment variable in your .env file.")
openai.api_key = OPENAI_API_KEY

# --- Translation Function ---
def translate_text(text, source_lang="en", target_lang="pt-PT"):
    response = openai.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {"role": "system", "content": f"You are a translator. Translate from {source_lang} to {target_lang}."},
            {"role": "user", "content": text}
        ],
        temperature=0,
        max_tokens=500
    )
    return response.choices[0].message.content.strip()

# --- SRT Parsing and Writing ---

In [4]:
def parse_srt(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    subtitles = []
    parts = re.split(r'\n\n', content.strip())
    for part in parts:
        lines = part.split('\n')
        if len(lines) >= 3:
            index = lines[0]
            timestamp = lines[1]
            text = '\n'.join(lines[2:])
            if not re.match(r'^\[.*\]$', text.strip()):
                subtitles.append((index, timestamp, text))
    return subtitles

def write_srt(subtitles, file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        for index, timestamp, text in subtitles:
            file.write(f"{index}\n{timestamp}\n{text}\n\n")

# --- Display Function ---

In [6]:
def display_translations(original, translated):
    html_content = f"""
    <div style='color: blue; font-weight: bold;'>Original: {original}</div>
    <div style='color: green; font-weight: bold;'>Translated: {translated}</div>
    """
    display(HTML(html_content))

# --- Subtitle Extraction and Track Information ---

In [8]:
def extract_subtitles(file_path, output_dir, track_id):
    srt_path = os.path.join(output_dir, f'subtitle_track_{track_id}.srt')
    try:
        subprocess.run(['/usr/local/bin/mkvextract', 'tracks', file_path, f'{track_id}:{srt_path}'], check=True)
        return srt_path
    except subprocess.CalledProcessError as e:
        display(Markdown(f"**Failed to extract subtitles:** {e}"))
        return None

def get_subtitle_track_details(file_path):
    mkvinfo_path = '/usr/local/bin/mkvinfo'
    mkvextract_path = '/usr/local/bin/mkvextract'

    if not os.path.isfile(mkvinfo_path) or not os.access(mkvinfo_path, os.X_OK):
        display(Markdown(f"**Error:** '{mkvinfo_path}' is not found or not executable."))
        return "Unknown Title", [], ""

    if not os.path.isfile(mkvextract_path) or not os.access(mkvextract_path, os.X_OK):
        display(Markdown(f"**Error:** '{mkvextract_path}' is not found or not executable."))
        return "Unknown Title", [], ""

    try:
        result = subprocess.run([mkvinfo_path, file_path], capture_output=True, text=True, check=True)
        lines = result.stdout.splitlines()
        track_details = []
        track_info = {}
        title = "Unknown Title"
        for i, line in enumerate(lines):
            if "Title:" in line:
                title = line.split(":")[1].strip()
            if "Track number" in line:
                track_id = int(line.split(":")[1].split()[0])
                track_info = {"id": track_id}
            if "Track type: subtitles" in line:
                track_info["type"] = "subtitles"
                for j in range(i+1, i+10):
                    if "Language:" in lines[j]:
                        track_info["language"] = lines[j].split(":")[1].strip()
                    if "Codec ID:" in lines[j]:
                        track_info["codec"] = lines[j].split(":")[1].strip()
                track_details.append(track_info)
                track_info = {}
        return title, track_details, result.stdout
    except subprocess.CalledProcessError as e:
        display(Markdown(f"**Failed to get MKV file information:** {e}"))
        return "Unknown Title", [], ""

# --- User Interaction ---

In [10]:
def get_user_confirmation(message):
    root = tk.Tk()
    root.withdraw()
    return messagebox.askyesno("Confirmation", message)

def select_mkv_file():
    root = tk.Tk()
    root.withdraw()
    file_path = filedialog.askopenfilename(
        title="Select MKV file",
        filetypes=[("MKV files", "*.mkv"), ("All files", "*.*")]
    )
    root.destroy()
    return file_path

# --- Main Function ---

In [12]:
def main():
    file_path = select_mkv_file()
    if not file_path:
        display(Markdown("**No file selected.**"))
        return

    output_dir = os.path.join(os.path.dirname(file_path), 'Extracted_Subtitles')
    os.makedirs(output_dir, exist_ok=True)

    movie_title, subtitle_tracks, mkv_info = get_subtitle_track_details(file_path)
    if not subtitle_tracks:
        display(Markdown(f"**No subtitle tracks found in the MKV file '{movie_title}'.**"))
        return

    display(Markdown(f"**Movie Title:** {movie_title}"))

    english_found = False
    for track in subtitle_tracks:
        if track.get('language') == 'eng':
            english_found = True
            srt_path = extract_subtitles(file_path, output_dir, track['id'])
            if srt_path:
                display(Markdown(f"**English subtitle extracted to:** `{srt_path}`"))
                break

    if not english_found:
        display(Markdown("**English subtitle not found**"))
        return

    portuguese_found = any(track.get('language') == 'por' for track in subtitle_tracks)
    if portuguese_found:
        proceed = get_user_confirmation("Portuguese subtitles found. Do you still want to proceed with translation?")
        if not proceed:
            display(Markdown("**Translation aborted by user.**"))
            return

    translated_srt_path = os.path.join(output_dir, 'subtitle_track_eng_pt-PT.srt')
    subtitles = parse_srt(srt_path)
    display(Markdown(f"**Parsed {len(subtitles)} subtitle segments.**"))

    translated_subtitles = []
    for index, timestamp, text in tqdm(subtitles, desc="Translating Subtitles"):
        translated_text = translate_text(text)
        if text != translated_text:
            display_translations(text, translated_text)
        translated_subtitles.append((index, timestamp, translated_text))
    
    write_srt(translated_subtitles, translated_srt_path)
    display(Markdown(f"**Translated subtitles written to {translated_srt_path}**"))

# Run the main function
if __name__ == "__main__":
    main()

**Movie Title:** House.of.the.Dragon.S01E10.720p.HMAX.WEBRip.x264-GalaxyTV

Extracting track 3 with the CodecID 'S_TEXT/UTF8' to the file '/Users/f.nuno/Downloads/House.of.the.Dragon.S01.COMPLETE.720p.HMAX.WEBRip.x264-GalaxyTV[TGx]/Extracted_Subtitles/subtitle_track_3.srt'. Container format: SRT text subtitles
Progress: 100%


**English subtitle extracted to:** `/Users/f.nuno/Downloads/House.of.the.Dragon.S01.COMPLETE.720p.HMAX.WEBRip.x264-GalaxyTV[TGx]/Extracted_Subtitles/subtitle_track_3.srt`

**Parsed 499 subtitle segments.**

Translating Subtitles:   0%|          | 0/499 [00:00<?, ?it/s]

**Translated subtitles written to /Users/f.nuno/Downloads/House.of.the.Dragon.S01.COMPLETE.720p.HMAX.WEBRip.x264-GalaxyTV[TGx]/Extracted_Subtitles/subtitle_track_eng_pt-PT.srt**