speech_to_text_en >>

In [1]:
import os
from moviepy.editor import VideoFileClip
import warnings
import torch
from transformers import pipeline

# ----------------
# Suppress warnings
# ----------------
warnings.filterwarnings("ignore")

# ----------------
# Device & dtype
# ----------------
_DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32

# ----------------
# Load model ONCE
# ----------------
print("üéôÔ∏è Loading Whisper STT model...")
_stt_pipe = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-large-v3",
    torch_dtype=_DTYPE,
    device=_DEVICE,
)
print("‚úÖ Whisper model loaded")


def speech_to_text_en(audio_path: str, language: str = "english") -> str:
    """
    ‡∏î‡∏∂‡∏á‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡∏à‡∏≤‡∏Å‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏•‡∏∞‡πÅ‡∏õ‡∏•‡∏á‡πÄ‡∏õ‡πá‡∏ô‡∏Ç‡πâ‡∏≠‡∏Ñ‡∏ß‡∏≤‡∏° ‡πÇ‡∏î‡∏¢‡πÉ‡∏ä‡πâ‡πÑ‡∏ü‡∏•‡πå‡∏ä‡∏±‡πà‡∏ß‡∏Ñ‡∏£‡∏≤‡∏ß‡πÄ‡∏û‡∏∑‡πà‡∏≠‡πÄ‡∏•‡∏µ‡πà‡∏¢‡∏á TypeError
    """
    temp_audio = "temp_whisper_input.wav"
    
    try:
        # 1. ‡πÄ‡∏ä‡πá‡∏Ñ‡∏ß‡πà‡∏≤‡πÄ‡∏õ‡πá‡∏ô‡πÑ‡∏ü‡∏•‡πå‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠‡∏´‡∏£‡∏∑‡∏≠‡πÑ‡∏°‡πà
        if audio_path.lower().endswith(('.mp4', '.mov', '.avi', '.mkv')):
            print(f"üé¨ Extracting audio from: {audio_path}")
            video = VideoFileClip(audio_path)
            
            # ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡∏ß‡πà‡∏≤‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠‡∏°‡∏µ‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡∏à‡∏£‡∏¥‡∏á‡πÑ‡∏´‡∏°
            if video.audio is None:
                video.close()
                return "" # ‡∏´‡∏£‡∏∑‡∏≠‡∏à‡∏∞ raise Error ‡∏ï‡∏≤‡∏°‡∏ó‡∏µ‡πà‡∏Ñ‡∏∏‡∏ì‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£
            
            # ‡πÄ‡∏Ç‡∏µ‡∏¢‡∏ô‡πÑ‡∏ü‡∏•‡πå‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡∏≠‡∏≠‡∏Å‡∏°‡∏≤‡πÄ‡∏õ‡πá‡∏ô .wav (16kHz, Mono ‡∏ï‡∏≤‡∏°‡∏ó‡∏µ‡πà Whisper ‡∏ä‡∏≠‡∏ö)
            # ‡∏ß‡∏¥‡∏ò‡∏µ‡∏ô‡∏µ‡πâ‡∏à‡∏∞‡πÄ‡∏•‡∏µ‡πà‡∏¢‡∏á TypeError: arrays to stack... ‡πÑ‡∏î‡πâ 100%
            video.audio.write_audiofile(
                temp_audio, 
                fps=16000, 
                nbytes=2, 
                codec='pcm_s16le', 
                verbose=False, 
                logger=None
            )
            video.close()
            path_to_process = temp_audio
        else:
            path_to_process = audio_path

        # 2. ‡∏™‡πà‡∏á Path ‡πÉ‡∏´‡πâ Pipeline (Whisper ‡∏à‡∏∞‡πÄ‡∏õ‡∏¥‡∏î‡πÑ‡∏ü‡∏•‡πå‡πÄ‡∏≠‡∏á‡∏î‡πâ‡∏ß‡∏¢ FFmpeg)
        print("üéôÔ∏è Transcribing...")
        result = _stt_pipe(
            path_to_process,
            chunk_length_s=30,
            generate_kwargs={"language": "english","return_timestamps": True}
        )

        # 3. ‡∏•‡∏ö‡πÑ‡∏ü‡∏•‡πå‡∏ä‡∏±‡πà‡∏ß‡∏Ñ‡∏£‡∏≤‡∏ß
        if os.path.exists(temp_audio):
            os.remove(temp_audio)

        return result["text"]

    except Exception as e:
        # ‡∏Å‡∏£‡∏ì‡∏µ Error ‡∏Å‡πá‡∏ï‡πâ‡∏≠‡∏á‡∏•‡∏ö‡πÑ‡∏ü‡∏•‡πå‡∏ó‡∏¥‡πâ‡∏á‡∏î‡πâ‡∏ß‡∏¢
        if os.path.exists(temp_audio):
            os.remove(temp_audio)
        print(f"‚ùå Error during STT: {e}")
        raise e

üéôÔ∏è Loading Whisper STT model...


`torch_dtype` is deprecated! Use `dtype` instead!
Device set to use cuda:0


‚úÖ Whisper model loaded


text_translation >>

In [2]:
"""‡πÅ‡∏õ‡∏• Text ‡∏†‡∏≤‡∏©‡∏≤‡∏≠‡∏±‡∏á‡∏Å‡∏§‡∏©‡πÄ‡∏õ‡πá‡∏ô Text ‡∏†‡∏≤‡∏©‡∏≤‡πÑ‡∏ó‡∏¢"""

"""‡πÅ‡∏õ‡∏•‡∏á Text ‡πÑ‡∏ó‡∏¢ ‡πÄ‡∏õ‡πá‡∏ô‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡πÑ‡∏ó‡∏¢"""

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

#‡πÇ‡∏´‡∏•‡∏î Model
model_id = "scb10x/typhoon-translate-4b"

# ‡πÇ‡∏´‡∏•‡∏î Tokenizer ‡πÅ‡∏•‡∏∞ Model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id, 
torch_dtype=torch.bfloat16, 
device_map={"": 0}, #‡∏ö‡∏±‡∏á‡∏Ñ‡∏±‡∏ö‡πÉ‡∏ä‡πâ GPU
)
  
def text_translation(ENG_text_path):
    #‡∏Ç‡πâ‡∏≠‡∏Ñ‡∏ß‡∏≤‡∏°‡∏ó‡∏µ‡πà‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£‡πÅ‡∏õ‡∏•
    text = ENG_text_path


    # ‡∏™‡∏≥‡∏Ñ‡∏±‡∏ç: ‡∏ï‡πâ‡∏≠‡∏á‡πÉ‡∏ä‡πâ System Prompt ‡∏ï‡∏≤‡∏°‡∏ó‡∏µ‡πà‡πÇ‡∏°‡πÄ‡∏î‡∏•‡∏Å‡∏≥‡∏´‡∏ô‡∏î‡πÄ‡∏û‡∏∑‡πà‡∏≠‡πÉ‡∏´‡πâ‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå‡πÅ‡∏°‡πà‡∏ô‡∏¢‡∏≥
    messages3 = [
        {"role": "system", "content": "Translate the following text into Thai."},
        {"role": "user", "content": text},
    ]

    # ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏° Input
    input_ids = tokenizer.apply_chat_template(
    messages3, 
    add_generation_prompt=True, 
    return_tensors="pt"
    ).to(model.device)

    
        # ‡∏™‡∏±‡πà‡∏á‡πÉ‡∏´‡πâ‡πÇ‡∏°‡πÄ‡∏î‡∏• Generate ‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå
    outputs = model.generate(
        input_ids, 
        max_new_tokens=512, 
        do_sample=False, # ‡πÅ‡∏ô‡∏∞‡∏ô‡∏≥‡πÉ‡∏´‡πâ‡∏õ‡∏¥‡∏î sampling ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏Ñ‡∏ß‡∏≤‡∏°‡πÅ‡∏°‡πà‡∏ô‡∏¢‡∏≥‡πÉ‡∏ô‡∏Å‡∏≤‡∏£‡πÅ‡∏õ‡∏•
        temperature=None, # ‡∏•‡πâ‡∏≤‡∏á‡∏Ñ‡πà‡∏≤‡∏ó‡∏µ‡πà‡πÑ‡∏°‡πà‡∏à‡∏≥‡πÄ‡∏õ‡πá‡∏ô‡∏≠‡∏≠‡∏Å
        top_p=None
    )
    TH_text = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)
    return TH_text


`torch_dtype` is deprecated! Use `dtype` instead!
The following generation flags are not valid and may be ignored: ['cache_implementation']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:18<00:00,  9.42s/it]


text_to_speech_TH >>

In [3]:
# -----------------------------
# MMS-TTS (Standard TTS)
# -----------------------------
import torch
import scipy.io.wavfile as wavfile
from transformers import VitsModel, AutoTokenizer

print("üîä Loading MMS-TTS-THAI model...")
_mms_model_name = "facebook/mms-tts-tha"
_mms_model = VitsModel.from_pretrained(_mms_model_name)
_mms_tokenizer = AutoTokenizer.from_pretrained(_mms_model_name)
print("‚úÖ MMS-TTS model loaded")



def text_to_speech_TH(text: str, output_path: str = "thai_mms.wav"):
    try:
        inputs = _mms_tokenizer(text, return_tensors="pt")

        with torch.no_grad():
            waveform = _mms_model(**inputs).waveform

        sample_rate = _mms_model.config.sampling_rate

        wavfile.write(
            output_path,
            rate=sample_rate,
            data=waveform[0].cpu().numpy(),
        )
        # ‡πÑ‡∏°‡πà‡∏ï‡πâ‡∏≠‡∏á return dictionary ‡πÅ‡∏•‡πâ‡∏ß ‡πÅ‡∏Ñ‡πà‡∏û‡∏¥‡∏°‡∏û‡πå‡∏ö‡∏≠‡∏Å‡∏™‡∏ñ‡∏≤‡∏ô‡∏∞ (‡∏´‡∏£‡∏∑‡∏≠‡πÑ‡∏°‡πà‡∏ï‡πâ‡∏≠‡∏á‡∏û‡∏¥‡∏°‡∏û‡πå‡∏Å‡πá‡πÑ‡∏î‡πâ)
        print(f"‚úÖ ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡∏†‡∏≤‡∏©‡∏≤‡πÑ‡∏ó‡∏¢‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à‡∏ó‡∏µ‡πà: {output_path}")

    except Exception as e:
        # ‡∏´‡∏≤‡∏Å‡πÄ‡∏Å‡∏¥‡∏î Error ‡∏Å‡πá‡πÉ‡∏´‡πâ‡∏û‡∏¥‡∏°‡∏û‡πå‡πÅ‡∏à‡πâ‡∏á‡πÄ‡∏ï‡∏∑‡∏≠‡∏ô‡πÅ‡∏ó‡∏ô‡∏Å‡∏≤‡∏£‡∏™‡πà‡∏á‡∏Ñ‡πà‡∏≤‡∏Å‡∏•‡∏±‡∏ö
        print(f"‚ùå ‡πÄ‡∏Å‡∏¥‡∏î‡∏Ç‡πâ‡∏≠‡∏ú‡∏¥‡∏î‡∏û‡∏•‡∏≤‡∏î: {e}")

üîä Loading MMS-TTS-THAI model...
‚úÖ MMS-TTS model loaded


In [4]:
import os
from moviepy.editor import VideoFileClip, AudioFileClip
from moviepy.audio.fx.all import audio_loop

def video_sound_editor(new_sound_path, video_path, output_path):
    """
    ‡∏£‡∏ß‡∏°‡πÑ‡∏ü‡∏•‡πå‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡πÉ‡∏´‡∏°‡πà‡πÄ‡∏Ç‡πâ‡∏≤‡∏Å‡∏±‡∏ö‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠‡πÄ‡∏î‡∏¥‡∏° 
    - ‡∏ñ‡πâ‡∏≤‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡∏™‡∏±‡πâ‡∏ô‡∏Å‡∏ß‡πà‡∏≤: ‡∏à‡∏∞‡∏ß‡∏ô‡∏•‡∏π‡∏õ‡πÄ‡∏™‡∏µ‡∏¢‡∏á (Loop)
    - ‡∏ñ‡πâ‡∏≤‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡∏¢‡∏≤‡∏ß‡∏Å‡∏ß‡πà‡∏≤: ‡∏à‡∏∞‡∏ï‡∏±‡∏î‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡πÉ‡∏´‡πâ‡∏û‡∏≠‡∏î‡∏µ (Trim)
    """
    try:
        # 1. ‡πÇ‡∏´‡∏•‡∏î‡∏Ñ‡∏•‡∏¥‡∏õ‡∏ï‡πâ‡∏ô‡∏ó‡∏≤‡∏á
        video = VideoFileClip(video_path)
        new_audio = AudioFileClip(new_sound_path)

        # 2. ‡∏à‡∏±‡∏î‡∏Å‡∏≤‡∏£‡∏Ñ‡∏ß‡∏≤‡∏°‡∏¢‡∏≤‡∏ß‡πÑ‡∏ü‡∏•‡πå‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡πÉ‡∏´‡πâ‡πÄ‡∏ó‡πà‡∏≤‡∏Å‡∏±‡∏ö‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠
        if new_audio.duration < video.duration:
            # ‡∏Å‡∏£‡∏ì‡∏µ‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡∏™‡∏±‡πâ‡∏ô‡∏Å‡∏ß‡πà‡∏≤: ‡πÉ‡∏´‡πâ‡∏ß‡∏ô‡∏•‡∏π‡∏õ‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡∏à‡∏ô‡∏à‡∏ö‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠
            print(f"üîÑ Audio is shorter than video. Looping audio...")
            final_audio = audio_loop(new_audio, duration=video.duration)
        else:
            # ‡∏Å‡∏£‡∏ì‡∏µ‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡∏¢‡∏≤‡∏ß‡∏Å‡∏ß‡πà‡∏≤: ‡∏ï‡∏±‡∏î‡πÉ‡∏´‡πâ‡∏û‡∏≠‡∏î‡∏µ‡∏Å‡∏±‡∏ö‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠
            print(f"‚úÇÔ∏è Audio is longer than video. Trimming audio...")
            final_audio = new_audio.subclip(0, video.duration)

        # 3. ‡∏£‡∏ß‡∏°‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡πÉ‡∏´‡∏°‡πà‡πÄ‡∏Ç‡πâ‡∏≤‡∏Å‡∏±‡∏ö‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠‡πÄ‡∏î‡∏¥‡∏°
        final_video = video.set_audio(final_audio)

        # 4. ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡πÑ‡∏ü‡∏•‡πå‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå
        print(f"üíæ Exporting video to: {output_path}")
        final_video.write_videofile(
            output_path,
            fps=video.fps,           # ‡πÉ‡∏ä‡πâ‡∏Ñ‡πà‡∏≤ fps ‡πÄ‡∏î‡∏¥‡∏°‡∏Ç‡∏≠‡∏á‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠
            codec="libx264",         # ‡∏°‡∏≤‡∏ï‡∏£‡∏ê‡∏≤‡∏ô‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠
            audio_codec="aac",       # ‡∏°‡∏≤‡∏ï‡∏£‡∏ê‡∏≤‡∏ô‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÄ‡∏™‡∏µ‡∏¢‡∏á
            audio=True,              # ‡∏ö‡∏±‡∏á‡∏Ñ‡∏±‡∏ö‡πÉ‡∏´‡πâ‡πÄ‡∏Ç‡∏µ‡∏¢‡∏ô‡πÅ‡∏ó‡∏£‡πá‡∏Å‡πÄ‡∏™‡∏µ‡∏¢‡∏á
            temp_audiofile='temp-audio-final.m4a',
            remove_temp=True
        )

        # 5. ‡∏õ‡∏¥‡∏î Resource (‡∏™‡∏≥‡∏Ñ‡∏±‡∏ç‡∏°‡∏≤‡∏Å‡πÄ‡∏û‡∏∑‡πà‡∏≠‡πÑ‡∏°‡πà‡πÉ‡∏´‡πâ‡πÑ‡∏ü‡∏•‡πå‡∏ñ‡∏π‡∏Å Lock)
        video.close()
        new_audio.close()
        final_audio.close()
        final_video.close()
        
        print("‚úÖ Video processing completed successfully!")

    except Exception as e:
        print(f"‚ùå Error in video_sound_editor: {e}")
        # ‡∏û‡∏¢‡∏≤‡∏¢‡∏≤‡∏°‡∏õ‡∏¥‡∏î Resource ‡πÅ‡∏°‡πâ‡∏à‡∏∞‡πÄ‡∏Å‡∏¥‡∏î Error
        if 'video' in locals(): video.close()
        if 'new_audio' in locals(): new_audio.close()

In [6]:
import customtkinter as ctk
from tkinter import filedialog
import os

# ‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤ Theme ‡πÉ‡∏´‡πâ‡∏î‡∏π‡∏ó‡∏±‡∏ô‡∏™‡∏°‡∏±‡∏¢
ctk.set_appearance_mode("System")  # ‡∏õ‡∏£‡∏±‡∏ö‡∏ï‡∏≤‡∏° Windows (Light/Dark)
ctk.set_default_color_theme("blue")

def show_modern_msg(title, message):
    """‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏´‡∏ô‡πâ‡∏≤‡∏ï‡πà‡∏≤‡∏á‡πÅ‡∏à‡πâ‡∏á‡πÄ‡∏ï‡∏∑‡∏≠‡∏ô‡πÅ‡∏ö‡∏ö Modern"""
    msg_window = ctk.CTkToplevel()
    msg_window.title(title)
    msg_window.geometry("350150")
    msg_window.attributes('-topmost', True)
    
    # ‡∏ß‡∏≤‡∏á‡∏ï‡∏≥‡πÅ‡∏´‡∏ô‡πà‡∏á‡πÉ‡∏´‡πâ‡∏Å‡∏•‡∏≤‡∏á‡∏à‡∏≠
    msg_window.update_idletasks()
    x = (msg_window.winfo_screenwidth() // 2) - (msg_window.winfo_width() // 2)
    y = (msg_window.winfo_screenheight() // 2) - (msg_window.winfo_height() // 2)
    msg_window.geometry(f"+{x}+{y}")

    label = ctk.CTkLabel(msg_window, text=message, font=("Leelawadee UI", 14), wraplength=300)
    label.pack(expand=True, padx=20, pady=20)

    btn = ctk.CTkButton(msg_window, text="‡∏ï‡∏Å‡∏•‡∏á", command=msg_window.destroy, width=100)
    btn.pack(pady=(0, 20))
    
    # ‡∏ö‡∏±‡∏á‡∏Ñ‡∏±‡∏ö‡πÉ‡∏´‡πâ User ‡∏Å‡∏î‡∏ï‡∏Å‡∏•‡∏á‡∏Å‡πà‡∏≠‡∏ô‡πÑ‡∏õ‡∏ï‡πà‡∏≠ (Optional)
    msg_window.grab_set() 
    msg_window.wait_window()

def processing_pipline():
    root = ctk.CTk()
    root.withdraw()

    # 1. ‡πÅ‡∏à‡πâ‡∏á‡πÄ‡∏ï‡∏∑‡∏≠‡∏ô‡∏Ç‡∏±‡πâ‡∏ô‡πÅ‡∏£‡∏Å
    show_modern_msg("‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏û‡∏£‡πâ‡∏≠‡∏°", "‡∏Å‡∏£‡∏∏‡∏ì‡∏≤‡πÄ‡∏•‡∏∑‡∏≠‡∏Å‡πÑ‡∏ü‡∏•‡πå‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠‡∏ï‡πâ‡∏ô‡∏â‡∏ö‡∏±‡∏ö‡∏Ñ‡∏£‡∏±‡∏ö ‚ú®")
    
    original_video_path = filedialog.askopenfilename(
        title="‡πÄ‡∏•‡∏∑‡∏≠‡∏Å‡πÑ‡∏ü‡∏•‡πå‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠‡∏ï‡πâ‡∏ô‡∏â‡∏ö‡∏±‡∏ö",
        filetypes=[("Video", "*.mp4 *.avi *.mkv")]
    )

    if not original_video_path: return

    # [ ... ‡∏™‡πà‡∏ß‡∏ô‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô‡πÄ‡∏î‡∏¥‡∏° ... ]
    base_dir = os.path.dirname(original_video_path)
    sound_path = os.path.join(base_dir, "translated_audio.wav")
    
    # ‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•
    eng_text = speech_to_text_en(original_video_path)
    th_text = text_translation(eng_text)
    text_to_speech_TH(th_text, sound_path)

    # 2. ‡πÅ‡∏à‡πâ‡∏á‡πÄ‡∏ï‡∏∑‡∏≠‡∏ô‡∏Ç‡∏±‡πâ‡∏ô‡∏ó‡∏µ‡πà‡∏™‡∏≠‡∏á (‡∏ô‡∏∏‡πà‡∏°‡∏ô‡∏ß‡∏•)
    show_modern_msg("‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•‡πÄ‡∏™‡∏£‡πá‡∏à‡πÅ‡∏•‡πâ‡∏ß", "‡πÄ‡∏£‡∏≤‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡∏û‡∏≤‡∏Å‡∏¢‡πå‡πÄ‡∏™‡∏£‡πá‡∏à‡πÅ‡∏•‡πâ‡∏ß‡∏Ñ‡∏£‡∏±‡∏ö\n‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡∏≠‡∏ô‡∏™‡∏∏‡∏î‡∏ó‡πâ‡∏≤‡∏¢: ‡πÄ‡∏•‡∏∑‡∏≠‡∏Å‡∏ó‡∏µ‡πà‡πÄ‡∏Å‡πá‡∏ö‡πÑ‡∏ü‡∏•‡πå‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠")

    final_output_path = filedialog.asksaveasfilename(
        title="‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå",
        defaultextension=".mp4",
        initialfile="video_TH.mp4"
    )

    if final_output_path:
        video_sound_editor(sound_path, original_video_path, final_output_path)
        show_modern_msg("‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à!", "‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏ß‡∏¥‡∏î‡∏µ‡πÇ‡∏≠‡∏û‡∏≤‡∏Å‡∏¢‡πå‡πÑ‡∏ó‡∏¢‡πÄ‡∏£‡∏µ‡∏¢‡∏ö‡∏£‡πâ‡∏≠‡∏¢‡πÅ‡∏•‡πâ‡∏ß‡∏Ñ‡∏£‡∏±‡∏ö üéâ")
        
        if os.path.exists(sound_path):
            os.remove(sound_path)

    root.destroy()

In [7]:
processing_pipline()

üé¨ Extracting audio from: C:/Users/Napat/Downloads/danmachi.mp4




üéôÔ∏è Transcribing...


The following generation flags are not valid and may be ignored: ['top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


‚úÖ ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡πÄ‡∏™‡∏µ‡∏¢‡∏á‡∏†‡∏≤‡∏©‡∏≤‡πÑ‡∏ó‡∏¢‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à‡∏ó‡∏µ‡πà: C:/Users/Napat/Downloads\translated_audio.wav
‚úÇÔ∏è Audio is longer than video. Trimming audio...
üíæ Exporting video to: C:/Users/Napat/Downloads/video_TH.mp4
Moviepy - Building video C:/Users/Napat/Downloads/video_TH.mp4.
MoviePy - Writing audio in temp-audio-final.m4a


                                                                   

MoviePy - Done.
Moviepy - Writing video C:/Users/Napat/Downloads/video_TH.mp4



                                                               

Moviepy - Done !
Moviepy - video ready C:/Users/Napat/Downloads/video_TH.mp4
‚úÖ Video processing completed successfully!
