In [1]:
from __future__ import unicode_literals
import argparse
import os
import re
from itertools import starmap
import multiprocessing

import pysrt
import imageio
import youtube_dl
import chardet
import nltk
#imageio.plugins.ffmpeg.download()
nltk.download('punkt')

from moviepy.editor import VideoFileClip, concatenate_videoclips
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
from sumy.summarizers.lsa import LsaSummarizer


#imageio.plugins.ffmpeg.download()

[nltk_data] Error loading punkt: <urlopen error [WinError 10060] A
[nltk_data]     connection attempt failed because the connected party
[nltk_data]     did not properly respond after a period of time, or
[nltk_data]     established connection failed because connected host
[nltk_data]     has failed to respond>


In [2]:
def summarize(srt_file, n_sentences, language="english"):
    """ Generate segmented summary

    Args:
        srt_file(str) : The name of the SRT FILE
        n_sentences(int): No of sentences
        language(str) : Language of subtitles (default to English)

    Returns:
        list: segment of subtitles

    """
    parser = PlaintextParser.from_string(
        srt_to_txt(srt_file), Tokenizer(language))
    stemmer = Stemmer(language)
    summarizer = LsaSummarizer(stemmer)
    summarizer.stop_words = get_stop_words(language)
    segment = []
    for sentence in summarizer(parser.document, n_sentences):
        index = int(re.findall("\(([0-9]+)\)", str(sentence))[0])
        item = srt_file[index]
        segment.append(srt_segment_to_range(item))
    return segment


In [3]:
def srt_to_txt(srt_file):
    """ Extract text from subtitles file

    Args:
        srt_file(str): The name of the SRT FILE

    Returns:
        str: extracted text from subtitles file

    """
    text = ''
    for index, item in enumerate(srt_file):
        if item.text.startswith("["):
            continue
        text += "(%d) " % index
        text += item.text.replace("\n", "").strip("...").replace(
                                     ".", "").replace("?", "").replace("!", "")
        text += ". "
    return text

In [4]:
def srt_segment_to_range(item):
    """ Handling of srt segments to time range

    Args:
        item():

    Returns:
        int: starting segment
        int: ending segment of srt

    """
    start_segment = item.start.hours * 60 * 60 + item.start.minutes * \
        60 + item.start.seconds + item.start.milliseconds / 1000.0
    end_segment = item.end.hours * 60 * 60 + item.end.minutes * \
        60 + item.end.seconds + item.end.milliseconds / 1000.0
    return start_segment, end_segment


In [5]:
def time_regions(regions):
    """ Duration of segments

    Args:
        regions():

    Returns:
        float: duration of segments

    """
    return sum(starmap(lambda start, end: end - start, regions))


def find_summary_regions(srt_filename, duration=30, language="english"):
    """ Find important sections

    Args:
        srt_filename(str): Name of the SRT FILE
        duration(int): Time duration
        language(str): Language of subtitles (default to English)

    Returns:
        list: segment of subtitles as "summary"

    """
    srt_file = pysrt.open(srt_filename)

    enc = chardet.detect(open(srt_filename, "rb").read())['encoding']
    srt_file = pysrt.open(srt_filename, encoding=enc)

    # generate average subtitle duration
    subtitle_duration = time_regions(
        map(srt_segment_to_range, srt_file)) / len(srt_file)
    # compute number of sentences in the summary file
    n_sentences = duration / subtitle_duration
    summary = summarize(srt_file, n_sentences, language)
    total_time = time_regions(summary)
    too_short = total_time < duration
    if too_short:
        while total_time < duration:
            n_sentences += 1
            summary = summarize(srt_file, n_sentences, language)
            total_time = time_regions(summary)
    else:
        while total_time > duration:
            n_sentences -= 1
            summary = summarize(srt_file, n_sentences, language)
            total_time = time_regions(summary)
    return summary


In [6]:
"""
from moviepy.editor import VideoFileClip
from moviepy.video.compositing.concatenate import concatenate_videoclips

def create_summary(filename, regions):
    input_video = VideoFileClip(filename)
    
    subclips = []
    last_end = 0
    
    for (start, end) in regions:
        # Ensure that t_start is within the valid range
        t_start = min(end, input_video.duration)
        
        subclip = input_video.subclip(t_start, end)
        subclips.append(subclip)
        last_end = end
    
    # Concatenate the subclips to create the summary
    summary = concatenate_videoclips(subclips)
    
    return summary, input_video.fps if input_video else 30   # Return the summary and the frame rate
""


SyntaxError: EOF while scanning triple-quoted string literal (3588453425.py, line 23)

In [7]:
""""
from moviepy.editor import VideoFileClip
from moviepy.video.compositing.concatenate import concatenate_videoclips

def create_summary(filename, regions, fps=30):
    input_video = VideoFileClip(filename)
    
    subclips = []
    last_end = 0
    
    for (start, end) in regions:
        # Ensure that t_start is within the valid range
        t_start = min(end, input_video.duration)
        
        subclip = input_video.subclip(t_start, end)
        subclips.append(subclip)
        last_end = end
    
    # Concatenate the subclips to create the summary
    summary = concatenate_videoclips(subclips)
    
    return summary, fps  # Return the summary and the frame rate""
""


In [12]:
def create_summary(filename, regions):
    """ Join segments

    Args:
        filename(str): filename
        regions():
    Returns:
        VideoFileClip: joined subclips in segment

    """
    subclips = []
    input_video = VideoFileClip(filename)
    last_end = 0
    for (start, end) in regions:
        subclip = input_video.subclip(start, end)
        subclips.append(subclip)
        last_end = end
    return concatenate_videoclips(subclips)


In [15]:

def get_summary(filename="2.mp4", subtitles="2.srt"):
    """Abstract function

    Args:
        filename(str): Name of the Video file (defaults to "1.mp4")
        subtitles(str): Name of the subtitle file (defaults to "1.srt")

    Returns:
        True

    """
    regions = find_summary_regions(subtitles, 60, "english")
    summary = create_summary(filename, regions)
    base, ext = os.path.splitext(filename)
    output = "{0}_1.mp4".format(base)
    summary.to_videofile(
                output,
                codec="libx264",
                temp_audiofile="temp.m4a", remove_temp=True, audio_codec="aac")
    return True



In [8]:
""""
def get_summary(filename="2.mp4", subtitles="2.srt"):
    """Abstract function

    Args:
        filename(str): Name of the Video file (defaults to "1.mp4")
        subtitles(str): Name of the subtitle file (defaults to "1.srt")

    Returns:
        True if successful, False otherwise

    """
    regions = find_summary_regions(subtitles, 60, "english")
    
    # Explicitly specify the desired frame rate (e.g., 30)
    summary, fps = create_summary(filename, regions, fps=30)

    if summary is None or fps is None:
        print("Error: Unable to create summary or fps is None.")
        return False

    base, ext = os.path.splitext(filename)
    output = "{0}_1.mp4".format(base)
    
    summary.to_videofile(
        output,
        codec="libx264",
        temp_audiofile="temp.m4a",
        remove_temp=True,
        audio_codec="aac",
        fps=fps
    )
    
    return True
    """


In [9]:
def download_video_srt(subs):
    """ Downloads specified Youtube video's subtitles as a vtt/srt file.

    Args:
        subs(str): Full url of Youtube video

    Returns:
        True


    The video will be downloaded as 1.mp4 and its subtitles as 1.(lang).srt
    Both, the video and its subtitles, will be downloaded to the same location
    as that of this script (sum.py)

    """
    ydl_opts = {
        'format': 'best',
        'outtmpl': '1.%(ext)s',
        'subtitlesformat': 'srt',
        'writeautomaticsub': True,
        # 'allsubtitles': True # Get all subtitles
    }

    movie_filename = ""
    subtitle_filename = ""
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        # ydl.download([subs])
        result = ydl.extract_info("{}".format(url), download=True)
        movie_filename = ydl.prepare_filename(result)
        subtitle_info = result.get("requested_subtitles")
        subtitle_language = subtitle_info.keys()[0]
        subtitle_ext = subtitle_info.get(subtitle_language).get("ext")
        subtitle_filename = movie_filename.replace(".mp4", ".%s.%s" %
                                                   (subtitle_language,
                                                    subtitle_ext))
    return movie_filename, subtitle_filename


In [10]:
if __name__ == '__main__':
    parser = argparse.ArgumentParser("Watch videos quickly")
    parser.add_argument('-i', '--video-file', help="Input video file")
    parser.add_argument('-s', '--subtitles-file',
                        help="Input subtitle file (srt)")
    parser.add_argument('-u', '--url', help="Video url", type=str)
    parser.add_argument('-k', '--keep-original-file',
                        help="Keep original movie & subtitle file",
                        action="store_true", default=False)

    args = parser.parse_args()

    url = args.url
    keep_original_file = args.keep_original_file

    if not url:
        # proceed with general summarization
        get_summary(args.video_file, args.subtitles_file)

    else:
        # download video with subtitles
        movie_filename, subtitle_filename = download_video_srt(url)
        summary_retrieval_process = multiprocessing.Process(target=get_summary, args=(movie_filename, subtitle_filename))
        summary_retrieval_process.start()
        summary_retrieval_process.join()
        if not keep_original_file:
            os.remove(movie_filename)
            os.remove(subtitle_filename)
            print("[sum.py] Remove the original files")


usage: Watch videos quickly [-h] [-i VIDEO_FILE] [-s SUBTITLES_FILE] [-u URL] [-k]
Watch videos quickly: error: unrecognized arguments: -f C:\Users\Pallavi\AppData\Roaming\jupyter\runtime\kernel-8cfd5932-5854-48df-98d2-3af3804436ce.json


SystemExit: 2

In [16]:
import argparse
import os
import multiprocessing


# Define the argument parser
class Args:
    parser = argparse.ArgumentParser(description="Watch videos quickly")

args = Args()
args.video_file = "2.mp4"
args.subtitles_file = "2.srt"
args.url = None  # Set to None if you want to use local files
args.keep_original_file = False  # Set to True if you want to keep the original files

url = args.url
keep_original_file = args.keep_original_file

if not url:
    # proceed with general summarization
    get_summary(args.video_file, args.subtitles_file)
else:
    # download video with subtitles
    movie_filename, subtitle_filename = download_video_srt(url)
    summary_retrieval_process = multiprocessing.Process(target=get_summary, args=(movie_filename, subtitle_filename))
    summary_retrieval_process.start()
    summary_retrieval_process.join()
    if not keep_original_file:
        os.remove(movie_filename)
        os.remove(subtitle_filename)
        print("[sum.py] Remove the original files")


Moviepy - Building video 2_1.mp4.
MoviePy - Writing audio in temp.m4a


                                                                                                                       

MoviePy - Done.
Moviepy - Writing video 2_1.mp4



                                                                                                                       

Moviepy - Done !
Moviepy - video ready 2_1.mp4


In [None]:
pip install imageio[ffmpeg]
