<a href="https://colab.research.google.com/github/Ilya2raev/subtitles_generator/blob/master/Subtitles_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from IPython.display import clear_output
!pip install git+https://github.com/openai/whisper.git -q
!pip install faster-whisper -q
!git clone https://git.ffmpeg.org/ffmpeg.git ffmpeg -q
!pip install pysubs2
!apt install libcublas11

!pip install moviepy
!pip install imageio==2.25.1
!apt install imagemagick
!pip install pysrt==1.1.2
!cat /etc/ImageMagick-6/policy.xml | sed 's/none/read,write/g'> /etc/ImageMagick-6/policy.xml
clear_output()

In [2]:
import gc
import ctypes
import sys
import warnings

from faster_whisper import WhisperModel
import torch
import pysubs2
import pysrt
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
from google.colab import drive
drive.mount('/content/drive')
warnings.filterwarnings('ignore')
clear_output()

In [4]:
torch.version.cuda

'12.1'

In [5]:
class cfg:
  # ASR
  whisper_model = 'large-v2'
  target_language = 'en'

  # Path
  video_path = '/content/drive/MyDrive/Subtitles_generator/video.mp4'

  # Subtitles
  bg_color = 'transparent'
  fontsize = 16
  font='Arial'
  color='yellow'

## 1. ASR

In [None]:
model = WhisperModel(model_size_or_path=cfg.whisper_model)
segments, _ = model.transcribe(audio=cfg.video_path, language=cfg.target_language)
results = [{'start': s.start,
            'end': s.end,
            'text': s.text} for s in segments]

del model
torch.cuda.empty_cache()
gc.collect()
ctypes.CDLL("libc.so.6").malloc_trim(0)
clear_output()

In [7]:
subs = pysubs2.load_from_whisper(results)
subs.save('script.srt')

## 2. Inserting scripts into video

In [8]:
srtfilename = "/content/script.srt"

In [9]:
def time_to_seconds(time_obj):
    return time_obj.hours * 3600 + time_obj.minutes * 60 + time_obj.seconds + time_obj.milliseconds / 1000

def create_subtitle_clips(subtitles, videosize, debug=False):
    subtitle_clips = []

    for subtitle in subtitles:
        start_time = time_to_seconds(subtitle.start)
        end_time = time_to_seconds(subtitle.end)
        duration = end_time - start_time

        video_width, video_height = videosize

        text_clip = TextClip(subtitle.text, fontsize=cfg.fontsize,
                             font=cfg.font, color=cfg.color,
                             bg_color=cfg.bg_color,
                             size=(video_width*3/4, None),
                             method='caption').set_start(start_time).set_duration(duration)

        subtitle_x_position = 'center'
        subtitle_y_position = video_height * 4 / 5

        text_position = (subtitle_x_position, subtitle_y_position)
        subtitle_clips.append(text_clip.set_position(text_position))

    return subtitle_clips

In [None]:
# Load video and SRT file
video = VideoFileClip(cfg.video_path)
subtitles = pysrt.open(srtfilename)

start, end = cfg.video_path.split(".mp4")
output_video_file = start + '_subtitled' + ".mp4"

print("Output file name: ", output_video_file)

# Create subtitle clips
subtitle_clips = create_subtitle_clips(subtitles, video.size)

# Add subtitles to the video
final_video = CompositeVideoClip([video] + subtitle_clips)

# Write output video file
final_video.write_videofile(output_video_file)