In [None]:
import time
import math
import ffmpeg
import nltk
import os
# nltk.download('popular') # only need to do this once

from faster_whisper import WhisperModel
from deep_translator import GoogleTranslator

In [None]:
# # DOWNLOAD BY URL
# import requests
# r = requests.get('https://05.mp4')
# f = open('input.mp4', 'wb')
# for chunk in r.iter_content(chunk_size=255):
#     if chunk: 
#         f.write(chunk)
# f.close()

In [None]:
# def convert_to_mp4(mkv_file):
#     name, ext = os.path.splitext(mkv_file)
#     out_name = name + ".mp4"
#     ffmpeg.input(mkv_file).output(out_name).run()
#     print("Finished converting {}".format(mkv_file))

# convert_to_mp4("input.mkv")

In [None]:
input_video = "input.mp4"
input_video_name = input_video.replace(".mp4", "")

In [None]:
def extract_audio():
    extracted_audio = f"audio-{input_video_name}.wav"
    stream = ffmpeg.input(input_video)
    stream = ffmpeg.output(stream, extracted_audio)
    ffmpeg.run(stream, overwrite_output=True)
    return extracted_audio

In [None]:
def transcribe(audio):
    model = WhisperModel("small", device="cpu")
    segments, info = model.transcribe(audio)
    language = info[0]
    print("Transcription language", info[0])
    segments = list(segments)
    for segment in segments:
        # print(segment)
        print("[%.2fs -> %.2fs] %s" %
              (segment.start, segment.end, segment.text))
    return language, segments

In [None]:
def format_time(seconds):
    hours = math.floor(seconds / 3600)
    seconds %= 3600
    minutes = math.floor(seconds / 60)
    seconds %= 60
    milliseconds = round((seconds - math.floor(seconds)) * 1000)
    seconds = math.floor(seconds)
    formatted_time = f"{hours:02d}:{minutes:02d}:{seconds:01d},{milliseconds:03d}"
    return formatted_time

In [None]:
def generate_subtitle_file(language, segments):
    subtitle_file = f"sub-{input_video_name}.{language}.srt"
    text = ""
    for index, segment in enumerate(segments):
        segment_start = format_time(segment.start)
        segment_end = format_time(segment.end)
        # fix start sub
        if segment_start == "00:00:0,000":
            tm_sec = time.strptime(segment_end, '%H:%M:%S,%f').tm_sec
            if tm_sec >= 3:
                segment_start = segment_end.replace(str(tm_sec), str(tm_sec - 2))
        text += f"{str(index+1)} \n"
        text += f"{segment_start} --> {segment_end} \n"
        text += f"{segment.text} \n"
        text += "\n"

    f = open(subtitle_file, "w")
    f.write(text)
    f.close()

    return subtitle_file

In [None]:
def add_subtitle_to_video(soft_subtitle, subtitle_file,  subtitle_language):
    video_input_stream = ffmpeg.input(input_video)
    subtitle_input_stream = ffmpeg.input(subtitle_file)
    output_video = f"output-{input_video_name}.mp4"
    subtitle_track_title = subtitle_file.replace(".srt", "")

    if soft_subtitle:
        stream = ffmpeg.output(
            video_input_stream, subtitle_input_stream, output_video, **{"c": "copy", "c:s": "mov_text"},
            **{"metadata:s:s:0": f"language={subtitle_language}",
               "metadata:s:s:0": f"title={subtitle_track_title}"}
        )
        ffmpeg.run(stream, overwrite_output=True)
    else:
        stream = ffmpeg.output(video_input_stream, output_video,
                               vf=f"subtitles={subtitle_file}")
        ffmpeg.run(stream, overwrite_output=True)

In [None]:
# GENERATE .SRT
extracted_audio = extract_audio()
language, segments = transcribe(audio=extracted_audio)
subtitle_file = generate_subtitle_file(
    language=language,
    segments=segments
)

In [None]:
# TRANSLATE
with open(subtitle_file, "r", encoding="utf-8") as f:
    text = f.read().strip()

x = nltk.tokenize.sent_tokenize(text)
full_translated = ""
for sentence in x:
    sentence += "\n"
    # print(sentence)
    translated = GoogleTranslator(
        source='english', target='portuguese').translate(sentence)
    translated += "\n\n"
    # print(translated)
    full_translated += translated

subtitle_file = f"sub-{input_video_name}.pt.srt"
f = open(subtitle_file, "w")
f.write(full_translated)
f.close()

In [None]:
# BUILD FINAL VIDEO
add_subtitle_to_video(
    soft_subtitle=True,
    subtitle_file=subtitle_file,
    subtitle_language='portuguese'
)