## Generate a complete video subtitled from just a link ##

# DEFINE FUNCTIONS

In [7]:
import sys
import pysrt
import requests
import copy

import assemblyai as aai

from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
from moviepy.video.fx.all import crop
from moviepy.video.tools.subtitles import SubtitlesClip

from bs4 import BeautifulSoup
from bisect import bisect_left
from calendar import month_name

# Use our local copy of ffmpeg, so we don't use the default linux binary
# import os
# os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/bin/ffmpeg"

# Place files in this path or modify the paths to point to where the files are
srtfilename = "srt_export.srt"

def gen_SRT_from_sentences(sentences, start_quote = '', end_quote = ''):
    f = open(srtfilename, "w")
    x = len(sentences)
    
    start_time = None
    end_time = None
    
    i = 0
    while len(sentences) > 0:
        # print(i)
        s = sentences.pop(0)
        # split sentence if too long
        if len(s.text) > 64:
            mid = len(s.text)//2
            about_mid = mid + s.text[mid:].index(' ')
            
            parts = s.text[:about_mid], s.text[about_mid+1:]
            # TODO: figure out the start/end times in a fancy way by checking the values
            split_time = (s.start + s.end) // 2

            # replace sentence with its components
            first_split = copy.deepcopy(s)
            second_split = copy.deepcopy(s)

            first_split.text = parts[0]
            second_split.text = parts[1]
            
            first_split.end = split_time
            second_split.start = split_time
            
            # We insert the second split first so the first split pushes it into index 1
            sentences.insert(0, second_split)
            sentences.insert(0, first_split)
            
            # start while loop again so we can split again if necessary
            continue
            
                
        # time measured in miliseconds
        ts = s.start
        te = s.end
        f.write(str(i) + '\n' +  ## HH:MM:SS,MMM
                str(ts // (1000*60*60)).rjust(2,'0') + ":" + str(ts // (1000*60) % 60).rjust(2,'0') + ":" + str(ts // (1000) % 60).rjust(2,'0') + "," + str(ts % 1000).rjust(3,'0') + ' --> ' +
                str(te // (1000*60*60)).rjust(2,'0') + ":" + str(te // (1000*60) % 60).rjust(2,'0') + ":" + str(te // (1000) % 60).rjust(2,'0') + "," + str(te % 1000).rjust(3,'0') + '\n')
        
        ## the sentence now
        # if len(s.text) > 32:
        #     f.write(parts[0] + '\n' + parts[1])
        # else:
        f.write(s.text)
        # two new lines separate subtitles
        f.write('\n\n')

        # check if this is the start or end of our quote
        if start_time is not None and end_quote != "":
            if end_quote in s.text:
                end_time = s.end
                # found the end time, we can quit now
                return (start_time, end_time)

        if start_quote != "" and start_time is None:
            if start_quote in s.text:
                start_time = s.start
        
        # Increment number for captions
        i += 1

    f.close()

    return (start_time, end_time)
        

# crop video
def crop_video(mp4filename):
    video = VideoFileClip(mp4filename)
    (w, h) = video.size
    nw = w
    
    if (w,h) == (1280,720):
        nw = 406
    if (w,h) == (1920, 1080):
        nw = 608
    
    video = crop(video, width=nw, height=h, x_center=w//2)

    return video


def time_to_seconds(time_obj):
    return time_obj.hours * 3600 + time_obj.minutes * 60 + time_obj.seconds + time_obj.milliseconds / 1000


def create_subtitle_clips(subtitles, videosize,fontsize=32, font='Arial', color='white', debug = False):
    subtitle_clips = []

    for subtitle in subtitles:
        start_time = time_to_seconds(subtitle.start)
        end_time = time_to_seconds(subtitle.end)
        duration = end_time - start_time

        video_width, video_height = videosize
        
        text_clip = TextClip(subtitle.text, fontsize=fontsize, font=font, color=color, bg_color = 'black',size=(video_width*3/4, None), method='caption').set_start(start_time).set_duration(duration)
        subtitle_x_position = 'center'
        subtitle_y_position = video_height * .6

        text_position = (subtitle_x_position, subtitle_y_position)  
        subtitle_clips.append(text_clip.set_position(text_position))

    return subtitle_clips


def gen_dl(month, year, conf_info, talk_id, speaker):
    return "https://media2.ldscdn.org/assets/general-conference/" + month + "-" + str(year) + "-general-conference/" + str(year) + "-" + conf_info[-2:] + "-" + talk_id + "-" + speaker + "-1080p-deu.mp4?download=true"


# link = "https://www.churchofjesuschrist.org/study/general-conference/2016/04/choices?lang=deu"
# https://www.churchofjesuschrist.org/study/general-conference/2023/04/58nelson?lang=eng
def get_download_link(link):
    lds,link_suffix = link.split(".org")
    conf_info,talk_lang_info = link_suffix.rsplit("/", 1)
    talk,lang = talk_lang_info.rsplit("?",1)
    
    r = requests.get(link)
    soup=BeautifulSoup(r.content,"html.parser", multi_valued_attributes=None)
    
    all_links = [x.get('href') for x in soup.find_all('a')]
    
    ## Figure out the 4 digit code for this talk
        # modern urls have the code in the url
    if any(char.isdigit() for char in talk):
        talk_session_number,second = int(talk[0]), int(talk[1])
        talk_id =str(talk_session_number) + "0" + str(second) + "0"
    else:
        # get reference indices of the sessions
        sessions = ["priesthood-session", "womens-session", "saturday-morning-session", "saturday-afternoon-session", "saturday-evening-session", "sunday-morning-session", "sunday-afternoon-session", "sunday-evening-session"]
        session_indices = []
        for s in sessions:
            try:
                session_indices.append( all_links.index((conf_info + '/' + s + '?' + lang)))
            except:
                continue
        session_indices.sort()
        
        # find the index of our talk
        my_index = all_links.index(link_suffix)
        # print(session_indices, my_index)
        
        # find out which session we're in
        
        # which session number is this? (determines first digit, 0-indexed)
        talk_session_number = bisect_left(session_indices, my_index) - 1
        
        # second number
        second = my_index - session_indices[talk_session_number]
        
        # combine 4 digit code
        talk_id =  str(talk_session_number) + "0" + str(second) + "0"
        # print(talk_session_number, second, talk_id)
    
    ## get download link
    # determine month
    month = ""
    if conf_info[-2:] == "04":
        month = "april"
    elif conf_info[-2:] == "10":
        month = "october"
    else:
        print("ERROR, weird month in URL")
        quit()
    
    year = conf_info[-7:-3]
    

    # Get the name of the speaker for link
    speaker = soup.find(class_="byline").get_text()
    # remove weird chars at beginning of string
    while not speaker[0].isalpha():
        speaker = speaker[1:]

    title,speaker = speaker.split(" ", 1)

    if title != "Elder" and title != "Präsident":
        # title is actually their first name
        speaker = title + " " + speaker
    speaker = speaker.split('\n')[0]
    speaker = speaker.replace(u'\xa0', u' ') # remove weird encoding chars

    speaker = '-'.join(speaker.split(' '))
    speaker = speaker.replace(".", '').lower()

    # check valid download links
    for i in range(10):
        for j in range(10):
            # modify the two numbers (0-9)
            talk_id = str((talk_session_number+i)%10) + "0" + str((second + j)%10) + "0"
            
            download_link = gen_dl(month, year, conf_info, talk_id, speaker)
            print("Trying link: " + download_link)
            r = requests.head(download_link)
            if r.status_code == requests.codes.ok:
                return download_link

    # no valid download link could be found
    raise "No valid download link could be constructed"


def auto_subtitle(link = "", id="", verbose=True, crop=True, start_quote = "", end_quote = "", language_code = "de"):
    if link == "" and id == "":
        raise "Please provide a link or ID to use"
    print("Starting to Subtitle")
    aai.settings.api_key = "128f6a73223443e78c8f67e9cdc3b059"
    # language set as german (de)
    # https://www.assemblyai.com/docs/Concepts/supported_languages
    config = aai.TranscriptionConfig(
        language_code=language_code,
        word_boost = ["David A. Bednar", "Bednar", "Erretter", "Erretters"],
        boost_param="high")

    config.set_custom_spelling(
      {
        "Bednar": ["Bettner"],
      }
    )

    # get download link
    download_link = get_download_link(link)
    if verbose:
        print("Download link generated: " + download_link)
    
    # if id, use the provided ID
    if id != "":
        transcript = aai.Transcript.get_by_id(id)
        if verbose:
            print("Transcription acquired by ID")
    # otherwise handle the link
    else:
        transcriber = aai.Transcriber(config=config)
        if verbose:
            print("Subtitling the talk found at: " + link)
        
        transcript = transcriber.transcribe(download_link)
        if verbose:
            print("Transcription acquired by transcribe method call")
            print("ID: ", transcript.id)
    
    
    # export the srt file from our transcription
    sentences = transcript.get_sentences()

    # convert sentences into srt
    start_time, end_time = gen_SRT_from_sentences(sentences, start_quote, end_quote)
    if verbose:
        print("SRT file generated")
        
        print("Start Time: " + str(start_time))
        print("End Time: " + str(end_time))
        
            
    
    # Crop video
    if crop:
        if verbose:
            print("Cropping video to 9:16 aspect ratio")
        video = crop_video(download_link)


    # Load SRT file
    if verbose:
        print("Opening SRT File")
    subtitles = pysrt.open(srtfilename, encoding="iso-8859-1")
    
    # Create subtitle clips
    if verbose:
        print("Generating Subtitle Clips")
    subtitle_clips = create_subtitle_clips(subtitles,video.size)
    
    # Add subtitles to the video
    if verbose:
        print("Adding subtitles to video")
    video = CompositeVideoClip([video] + subtitle_clips)
    
    # Trim video
    if start_time is not None and end_time is not None:
        video = trim_video(video, start_time/1000, end_time/1000)
        

    if verbose:
        print("Final Video completed. Exporting...")
        
    # Write output video file
    try:
        video.write_videofile(link.rsplit("/",1)[1].split("?",1)[0] + '_subtitled.mp4')
    except:
        video.write_videofile("video_subtitled.mp4")

    print("Done. Final length of video: " + str(video.duration))

    return video



In [8]:
from moviepy.editor import *

""" Trims the video to the start and end timestamps, given in seconds """
def trim_video(video, start, end, speedup = True, max_duration = 90, export = False):
    # video = VideoFileClip(file)
    if start is None:
        start = 0
    if end is None:
        end = video.duration
    video = video.subclip(start, end)

    # if the video is longer than 90 seconds, we'll have to speed it up
    d = video.duration
    print("Trimmed video at timestamps " + str(start) + " - " + str(end))
    print("New length of video is " + str(d) + " seconds")
    if d > max_duration:
        # how much must we speed up to reach max_duration?
        speedup = d / max_duration
        # apply speed
        video = video.fx( vfx.speedx, speedup)
        print("Needed to speed up by a factor of " + str(speedup))
        print("Was " + str(d) + ", now is " + str(video.duration))

    # export video
    if export:
        video.write_videofile(file.split('.mp4')[0] + "_trimmed.mp4")

    return video
        

In [None]:
### EDIT THIS ###
link = "https://www.churchofjesuschrist.org/study/general-conference/2020/04/35eyring?lang=deu"
### EDIT THIS ###

id =""

start_quote = "Der Herr hat das alles"
end_quote = "bereit zu sein"

result = auto_subtitle(link, id=id, start_quote = start_quote, end_quote = end_quote)
# result.ipython_display()
#https://media2.ldscdn.org/assets/general-conference/october-2020-general-conference/2020-10-2030-steven-j-lund-1080p-deu.mp4?download=true

Starting to Subtitle
Trying link: https://media2.ldscdn.org/assets/general-conference/april-2020-general-conference/2020-04-3050-henry-b-eyring-1080p-deu.mp4?download=true
Download link generated: https://media2.ldscdn.org/assets/general-conference/april-2020-general-conference/2020-04-3050-henry-b-eyring-1080p-deu.mp4?download=true
Subtitling the talk found at: https://www.churchofjesuschrist.org/study/general-conference/2020/04/35eyring?lang=deu
Transcription acquired by transcribe method call
ID:  6ja94ki8i2-e6a0-4832-be0d-30e7c2aa17bb
SRT file generated
Start Time: 674984
End Time: 754939
Cropping video to 9:16 aspect ratio
Opening SRT File
Generating Subtitle Clips
Adding subtitles to video
Trimmed video at timestamps 674.984 - 754.939
New length of video is 79.95499999999993 seconds
Final Video completed. Exporting...
Moviepy - Building video 35eyring_subtitled.mp4.
MoviePy - Writing audio in 35eyring_subtitledTEMP_MPY_wvf_snd.mp3


                                                                                                                       

MoviePy - Done.
Moviepy - Writing video 35eyring_subtitled.mp4



                                                                                                                       

Moviepy - Done !
Moviepy - video ready 35eyring_subtitled.mp4
Done. Final length of video: 79.95499999999993
Moviepy - Building video __temp__.mp4.
MoviePy - Writing audio in __temp__TEMP_MPY_wvf_snd.mp3


                                                                                                                       

MoviePy - Done.
Moviepy - Writing video __temp__.mp4



t:  40%|██████████████████████████▍                                       | 962/2396 [00:16<00:21, 67.22it/s, now=None]

In [None]:
from ipywidgets import *
from IPython.display import display

im = interact_manual(auto_subtitle, link="", id="", start_quote="", end_quote="", crop=True, verbose=True,language_code="de")

display(im)

In [None]:
from ipywidgets import *
from IPython.display import display



button = widgets.Button(description="Create Reel")
output = widgets.Output()


def on_button_clicked(b):
    with output:
        print("Creating Reel...")
    # sanitize input
    i = id.value
    s = start_quote.value
    e = end_quote.value
    if i == '':
        i = None
    if s == '':
        s = None
    if e == '':
        e = None
        
    auto_subtitle(link = link.value, id=i, start_quote=s, end_quote=end.value, crop=crop.value, language_code=la.value)


button.on_click(on_button_clicked)
d = {'description_width': 'initial'}
link = widgets.Text(
    value=None,
    placeholder='https://www.churchofjesuschrist.org/study/general-conference/2020/10/23lund?lang=deu',
    description='Conference Talk Link:',
    disabled=False ,
    layout=Layout(min_width='700px'),
    style = d
)


id = widgets.Text(
    value=None,
    placeholder='6jxqr57zbv-dbb0-4f81-92ff-67360da09bae',
    description='Transcription ID (optional):',
    disabled= False,
    layout=Layout(min_width='700px'),
    style = d
)

start = widgets.Text(
    value=None,
    placeholder='decisions determine',
    description='Start of Quote (optional):',
    disabled=False ,
    layout=Layout(min_width='700px'),
    style = d
)

end = widgets.Text(
    value=None,
    placeholder='destiny',
    description='End of Quote (optional):',
    disabled=False ,
    layout=Layout(min_width='700px'),
    style = d
)

cropv = widgets.Checkbox(
    value=True,
    description='Crop to 9:16 aspect ratio',
    disabled=False,
    indent=False
)
la = widgets.Combobox(
    value='de',
    placeholder='',
    options=['de', 'en'],
    description='Language Code:',
    ensure_option=True,
    disabled=False,
    style = d
)
display(link, HBox([la, cropv]), id, start, end)


progress = widgets.IntProgress(value=4, max=10, description = "Clip progress")
display(button, output)