<a href="https://colab.research.google.com/github/ramsrigouthamg/Supertranslate.ai/blob/main/Burn_Subtitles_Into_Video/Burn_Subtitles_to_video_using_Moviepy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## How to permanently add or burn subtitles to a video using Moviepy

For files (mp4 and srt) used in this repository check: https://github.com/ramsrigouthamg/Supertranslate.ai

Author: [Ramsri Goutham](https://twitter.com/ramsri_goutham) 

In [3]:
from bs4 import BeautifulSoup
from bisect import bisect_left
from calendar import month_name

import requests

# link = "https://www.churchofjesuschrist.org/study/general-conference/2016/04/choices?lang=deu"

def get_download_link(link):
    lds,link_suffix = link.split(".org")
    conf_info,talk_lang_info = link_suffix.rsplit("/", 1)
    talk,lang = talk_lang_info.rsplit("?",1)
    
    #https://media2.ldscdn.org/assets/general-conference/april-2016-general-conference/2016-04-4010-thomas-s-monson-1080p-deu.mp4?download=true
    #class = d-links link-1080-a06a8bdc93632558c9e7f284f6e9a847768e8621
    r = requests.get(link)
    soup=BeautifulSoup(r.content,"html.parser", multi_valued_attributes=None)
    
    all_links = [x.get('href') for x in soup.find_all('a')]
    
    ## Figure out the 4 digit code for this talk
    
    # get reference indices of the sessions
    sessions = ["priesthood-session", "womens-session", "saturday-morning-session", "saturday-afternoon-session", "saturday-evening-session", "sunday-morning-session", "sunday-afternoon-session", "sunday-evening-session"]
    session_indices = []
    for s in sessions:
        try:
            session_indices.append( all_links.index((conf_info + '/' + s + '?' + lang)))
        except:
            continue
    session_indices.sort()
    
    # find the index of our talk
    my_index = all_links.index(link_suffix)
    # print(session_indices, my_index)
    
    # find out which session we're in
    # which session number is this? (determines first digit, 0-indexed)
    talk_session_number = bisect_left(session_indices, my_index) - 1
    
    # second number
    second = my_index - session_indices[talk_session_number]
    
    # combine 4 digit code
    talk_id =  str(talk_session_number) + "0" + str(second) + "0"
    # print(talk_session_number, second, talk_id)
    
    ## get download link
    # determine month
    month = ""
    if conf_info[-2:] == "04":
        month = "april"
    elif conf_info[-2:] == "10":
        month = "october"
    else:
        print("ERROR, weird month in URL")
        quit()
    
    year = conf_info[-7:-3]
    
    talk_text=soup.find(class_="body-block").get_text()
    speaker = soup.find(class_="byline").get_text()
    # print(speaker)
    speaker = speaker.split(" ", 1)[1]
    speaker = speaker.split('\n')[0]
    speaker = speaker.replace(u'\xa0', u' ') # remove weird encoding chars
    # print(speaker.split(' '))
    speaker = '-'.join(speaker.split(' '))
    speaker = speaker.replace(".", '').lower()
    # print(speaker)
    
    download_link="https://media2.ldscdn.org/assets/general-conference/" + month + "-" + str(year) + "-general-conference/" + str(year) + "-" + conf_info[-2:] + "-" + talk_id + "-" + speaker + "-1080p-deu.mp4?download=true"
    return download_link

In [4]:
import assemblyai as aai
import requests

aai.settings.api_key = "128f6a73223443e78c8f67e9cdc3b059"

# language set as german (de)
# https://www.assemblyai.com/docs/Concepts/supported_languages
config = aai.TranscriptionConfig(language_code="de")
transcriber = aai.Transcriber(config = config)

### EDIT THIS ###
link = "https://www.churchofjesuschrist.org/study/general-conference/2016/04/i-am-a-child-of-god?lang=deu"
### EDIT THIS ###

# generate download link


# download the video
# r = requests.get(download_link, allow_redirects=True)
# open('video.mp4', 'wb').write(r.content)

# transcribe video
# transcript = transcriber.transcribe("video.mp4")
download_link = get_download_link(link)
transcript = transcriber.transcribe(download_link)

print(transcript.text)

# export the srt file
f = open("srt_export.srt", "w")
f.write(transcript.export_subtitles_srt())
f.close()

Zu unserer grundlegendsten Lehre gehört das Wissen, dass wir Kinder eines lebendigen Gottes sind. Deshalb ist einer seiner heiligsten Namen Vater oder Vater im Himmel. Diese Lehre ist von Propheten in allen Zeitaltern unmissverständlich verkündet worden. Als Mose vom Satan versucht wurde, wies er ihn mit den Worten zurück, Wer bist du? Denn siehe, ich bin ein Sohn Gottes. Der Psalmist verkündete dem Volk Israel, Ihr alle seid Söhne des Höchsten. Paulus erklärte den Athenern auf dem Areopak, sie seien von Gottes Art. Joseph Smith und Signe Rigdon sahen den Vater und den Sohn in einer Vision. Eine himmlische Stimme verkündete ihnen, dass die Bewohner der Welten für Gott gezeugte Söhne und Töchter sind. 1995 haben 15 lebende Apostel und Propheten bestätigt, alle Menschen sind als Abbild Gottes erschaffen. Jeder Mensch ist ein geliebter Geizsohn oder eine geliebte Geiztochter himmlischer Eltern. Präsident Thomas S. Monson hat bezeugt, wir sind Söhne und Töchter des lebendigen Gottes. Man k

In [5]:
# Place files in this path or modify the paths to point to where the files are
srtfilename = "srt_export.srt"
mp4filename = "video.mp4"

In [6]:
import sys
import pysrt
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
from moviepy.video.fx.all import crop
from moviepy.video.tools.subtitles import SubtitlesClip

def crop_video(mp4filename):
    # crop video
    video = VideoFileClip(mp4filename)
    (w, h) = video.size
    nw = w
    
    if (w,h) == (1280,720):
        nw = 406
    if (w,h) == (1920, 1080):
        nw = 608
    
    video = crop(video, width=nw, height=h, x_center=w//2)

    return video

# # SubtitlesClip object generated with lambda function
# generator = lambda txt: TextClip(txt, font='Arial', fontsize=24, color='white')
# sub = SubtitlesClip("srt_export.srt", generator)



def time_to_seconds(time_obj):
    return time_obj.hours * 3600 + time_obj.minutes * 60 + time_obj.seconds + time_obj.milliseconds / 1000


def create_subtitle_clips(subtitles, videosize,fontsize=30, font='Arial', color='white', debug = False):
    subtitle_clips = []

    for subtitle in subtitles:
        start_time = time_to_seconds(subtitle.start)
        end_time = time_to_seconds(subtitle.end)
        duration = end_time - start_time

        video_width, video_height = videosize
        
        text_clip = TextClip(subtitle.text, fontsize=fontsize, font=font, color=color, bg_color = 'black',size=(video_width*3/4, None), method='caption').set_start(start_time).set_duration(duration)
        subtitle_x_position = 'center'
        subtitle_y_position = video_height * .6

        text_position = (subtitle_x_position, subtitle_y_position)  
        subtitle_clips.append(text_clip.set_position(text_position))

    return subtitle_clips


# Process video

video = crop_video(mp4filename)
# Load video and SRT file
subtitles = pysrt.open(srtfilename, encoding="iso-8859-1")

# Create subtitle clips
subtitle_clips = create_subtitle_clips(subtitles,video.size)

# Add subtitles to the video
final_video = CompositeVideoClip([video] + subtitle_clips)

# Write output video file
final_video.write_videofile('video_subtitled.mp4')




Output file name:  video_subtitled.mp4
Moviepy - Building video video_subtitled.mp4.
MoviePy - Writing audio in video_subtitledTEMP_MPY_wvf_snd.mp3


                                                                                                                       

MoviePy - Done.
Moviepy - Writing video video_subtitled.mp4



                                                                                                                       

Moviepy - Done !
Moviepy - video ready video_subtitled.mp4


