In [None]:
from moviepy.editor import VideoFileClip
import re
import os

In [None]:
def split_movie_by_subtitles(srt_file, movie_file, output_directory):
    # Create output directory if it doesn't exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Read SRT file
    with open(srt_file, 'r') as file:
        srt_content = file.read()

    # Split SRT content into subtitle blocks
    subtitle_blocks = re.split(r'\n\s*\n', srt_content)

    # Read movie duration
    video = VideoFileClip(movie_file)
    duration = video.duration

    # Process subtitle blocks
    for i, subtitle_block in enumerate(subtitle_blocks):
        # Extract start and end time from subtitle block
        time_pattern = re.compile(r'(\d+):(\d+):(\d+),(\d+)\s*-->\s*(\d+):(\d+):(\d+),(\d+)')
        matches = time_pattern.search(subtitle_block)
        if matches:
            start_time = int(matches.group(1)) * 3600 + int(matches.group(2)) * 60 + int(matches.group(3)) + int(
                matches.group(4)) / 1000
            end_time = int(matches.group(5)) * 3600 + int(matches.group(6)) * 60 + int(matches.group(7)) + int(
                matches.group(8)) / 1000

            # Extract subtitle text and combine multi-line subtitles
            subtitle_lines = subtitle_block.split('\n')[2:]
            subtitle_text = ' '.join(subtitle_lines)
            subtitle_text = ''.join([char for char in subtitle_text if not char.isdigit() and char.isalpha() or char.isspace()])
            index = i + 1
            # Create clip filename based on subtitle
            clip_filename = str(index)+". "+subtitle_text+".mp4"
            wav_filename = str(index)+". "+subtitle_text+".wav"

            # Ensure the start and end times are within the movie duration
            start_time = max(start_time, 0)
            end_time = min(end_time, duration)

            # Extract the clip
            clip = video.subclip(start_time, end_time)
            clip.write_videofile(os.path.join(output_directory, clip_filename), codec="libx264")
            clip.audio.write_audiofile(os.path.join(output_directory, wav_filename))

            print(f"Clip {clip_filename} extracted from {start_time} to {end_time} seconds.")

    video.reader.close()


In [None]:
split_movie_by_subtitles(
    # the video subtitle file path
    "Iron.Man.3.5.1.EnSub.2013BD1080p_SharePirate.srt",
    # the video file path
    "Iron.Man.3.2013.BluRay 1080p.mkv",
    "output"
)