<a href="https://colab.research.google.com/github/Elijah266/videocast/blob/main/podcast_short_subtitles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Upload short mp3
from google.colab import files
upload_files = files.upload()

In [None]:
!pip install faster_whisper
!pip install srt
!pip install budoux
!apt install -y fonts-takao
!cp *.ttf "/usr/share/fonts/truetype/"
!fc-cache -fv
!apt install libcublas11

In [None]:
# Add line breaks to subtitles
def combine_text_until_max_length(data, max_length):
    if not data:
        return []

    combined_text = ""
    current_string = ""

    for item in data:
        if len(current_string + item) <= max_length:
            current_string += item
        else:
            combined_text += current_string + "\n"
            current_string = item

    if current_string:
        combined_text += current_string + "\n"

    return combined_text

In [None]:
# Transcribe and create .srt subtitle file
import budoux
from faster_whisper import WhisperModel
from datetime import timedelta
from srt import Subtitle
import srt
import glob

files = glob.glob(r"*.mp3", recursive=True)
file_name = files[0].rstrip(".mp3")
file_path = file_name + ".mp3"
image_files = glob.glob(r"*.png", recursive=True)
image_file = image_files[0]
model_size = "large-v3"

model = WhisperModel(model_size, device="cuda", compute_type="float16")
segments, info = model.transcribe(file_path, beam_size=10)
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))

parser = budoux.load_default_japanese_parser()

subs = []
for segment in segments:
  print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
  index = int(segment.id) + 1
  start = segment.start
  end = segment.end
  segment_text = segment.text.replace("、", " ")
  segment_text = segment_text.replace("。", "")
  word_list = parser.parse(segment_text)
  text = combine_text_until_max_length(word_list, 25)
  sub = Subtitle(index=1, start=timedelta(seconds=timedelta(seconds=start).seconds,
                                          microseconds=timedelta(seconds=start).microseconds),
                                          end=timedelta(seconds=timedelta(seconds=end).seconds,
                                          microseconds=timedelta(seconds=end).microseconds), content=text, proprietary='')
  subs.append(sub)

with open(file_name + ".srt", mode="w", encoding="utf-8") as f:
    f.write(srt.compose(subs))

In [None]:
# Create embedded subtitle file .ass
!ffmpeg -i $file_name".srt" $file_name".ass"

In [None]:
# Create a single picture video file .mp4 without subtitles
!ffmpeg \
    -loop 1 \
    -r 30000/1001 \
    -i $image_file -i $file_name".mp3" \
    -vcodec libx264 \
    -acodec aac -strict experimental -ab 320k -ac 2 -ar 48000 \
    -pix_fmt yuv420p \
    -shortest \
    $file_name".mp4"

In [None]:
# Create a short video file .mp4 with subtitles
!ffmpeg \
    -i $file_name".mp4" \
    -vf "subtitles=$file_name'.ass':'force_style=FontName=TakaoPGothic,Fontsize=22,Bold=3,PrimaryColour=&H00CAF8F9,SecondaryColour=&H00FFFFFF,OutlineColour=&H00313131,Outline=2'" \
    $file_name"_subtitles.mp4"

In [None]:
from google.colab import files
download_files = files.download(file_name + "_subtitles.mp4")