In [None]:
# creat audio mp3 with srt time stamp
import re
import os
from openai import OpenAI

client = OpenAI()

def process_srt_file(srt_file_path):
    with open(srt_file_path, "r") as srt_file:
        srt_content = srt_file.read()

    # Define the pattern to match each subtitle block in the SRT format
    subtitle_pattern = re.compile(r"(\d+)\n(\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3})\n(.+?)\n\n", re.DOTALL)

    # Find all subtitle blocks
    subtitles = subtitle_pattern.findall(srt_content)
    output_folder = "subAudio"
    os.makedirs(output_folder, exist_ok=True)

    for idx, subtitle_block in enumerate(subtitles, start=1):
        subtitle_text = subtitle_block[2].strip()
        print(f"Processing subtitle {idx}: {subtitle_text}")
        response = client.audio.speech.create(
            model="tts-1",
            voice="alloy",
            input=subtitle_text
        )
        output_filename = os.path.join(output_folder, f"subtitle_block_{idx}.mp3")
        response.stream_to_file(output_filename)
        print(f"Subtitle block {idx} saved as {output_filename}")

srt_file_path = "output_subtitles.srt"
process_srt_file(srt_file_path)


In [None]:
#this code cuts the video into small clips based on the timestamp from srt

import re
import os
from moviepy.video.io.VideoFileClip import VideoFileClip

def parse_srt_file(srt_file_path):
    """Parse the timecodes and texts from the SRT file."""
    with open(srt_file_path, "r") as srt_file:
        srt_content = srt_file.read()

    # Regex to extract block number, timecodes, and subtitle text
    subtitle_pattern = re.compile(
        r"(\d+)\n(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\n(.+?)\n\n", re.DOTALL
    )

    subtitles = subtitle_pattern.findall(srt_content)
    return subtitles

def convert_timecode_to_seconds(timecode):
    """Convert SRT timecode format (HH:MM:SS,ms) to seconds."""
    hours, minutes, seconds = timecode.split(":")
    seconds, milliseconds = seconds.split(",")
    return int(hours) * 3600 + int(minutes) * 60 + int(seconds) + int(milliseconds) / 1000

def cut_video_with_moviepy(video_file_path, srt_file_path):
    """Cut the video into parts based on the SRT timecodes using moviepy."""
    subtitles = parse_srt_file(srt_file_path)
    output_folder = "video_clips"
    os.makedirs(output_folder, exist_ok=True)

    video = VideoFileClip(video_file_path)

    for idx, (block_number, start_time, end_time, text) in enumerate(subtitles, start=1):
        # Convert timecodes to seconds
        start_seconds = convert_timecode_to_seconds(start_time)
        end_seconds = convert_timecode_to_seconds(end_time)

        video_clip = video.subclip(start_seconds, end_seconds)

        output_filename = os.path.join(output_folder, f"video_clip_{idx}.mp4")

        video_clip.write_videofile(output_filename, codec="libx264")

        print(f"Clip {idx} saved: {output_filename}")

video_file_path = "./videos/input.mp4"
srt_file_path = "output_subtitles.srt" 

# Cut the video based on SRT timecodes
cut_video_with_moviepy(video_file_path, srt_file_path)


In [None]:
# extend truncate video to match with sound
from moviepy.editor import VideoFileClip, AudioFileClip
from moviepy.video.fx.all import speedx
import os

# Define directories
video_dir = 'video_clips'
audio_dir = 'subAudio'
output_dir = 'truncated_videos'

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Get sorted list of video and audio files
video_files = sorted([f for f in os.listdir(video_dir) if f.endswith('.mp4')])
audio_files = sorted([f for f in os.listdir(audio_dir) if f.endswith('.mp3')])

for video_file, audio_file in zip(video_files, audio_files):
    video_path = os.path.join(video_dir, video_file)
    audio_path = os.path.join(audio_dir, audio_file)


    video_clip = VideoFileClip(video_path)
    audio_clip = AudioFileClip(audio_path)
    

    video_duration = video_clip.duration
    audio_duration = audio_clip.duration
    
    # Calculate speed factor
    speed_factor = video_duration / audio_duration

    if video_duration > audio_duration:
        adjusted_video = speedx(video_clip, factor=speed_factor)
    else:
        adjusted_video = speedx(video_clip, factor=speed_factor)
    

    final_clip = adjusted_video.set_audio(audio_clip)
    
    output_path = os.path.join(output_dir, f'truncated_{video_file}')
    
    final_clip.write_videofile(output_path, codec='libx264', audio_codec='aac')


In [None]:
#combine audios and video clips
from moviepy.editor import VideoFileClip, AudioFileClip
import os

# Paths to the folders
audio_folder = 'subaudio'
video_folder = 'truncated_videos'
output_folder = 'output_clips'

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

audio_files = sorted([f for f in os.listdir(audio_folder) if f.endswith('.mp3')])
video_files = sorted([f for f in os.listdir(video_folder) if f.endswith('.mp4')])

if len(audio_files) != len(video_files):
    raise ValueError("The number of audio files and video files do not match!")

for audio_file, video_file in zip(audio_files, video_files):
    audio_path = os.path.join(audio_folder, audio_file)
    video_path = os.path.join(video_folder, video_file)
    output_path = os.path.join(output_folder, f'combined_{video_file}')

    audio_clip = AudioFileClip(audio_path)
    video_clip = VideoFileClip(video_path)

    video_with_audio = video_clip.set_audio(audio_clip)

    video_with_audio.write_videofile(output_path, codec='libx264')

    print(f'Combined {audio_file} with {video_file} into {output_path}')


In [11]:
#merge all file together
from moviepy.editor import VideoFileClip, concatenate_videoclips
import os
import re

def extract_number(filename):

    match = re.search(r'(\d+)', filename)
    return int(match.group(0)) if match else 0

folder_path = 'output_clips'

file_list = [f for f in os.listdir(folder_path) if f.endswith('.mp4')]
file_list.sort(key=extract_number)

clips = [VideoFileClip(os.path.join(folder_path, filename)) for filename in file_list]

# Combine clips
final_clip = concatenate_videoclips(clips, method="compose")

# Write the final output
final_clip.write_videofile('output_combined.mp4', codec='libx264')


Moviepy - Building video output_combined.mp4.
MoviePy - Writing audio in output_combinedTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
Moviepy - Writing video output_combined.mp4



                                                                

Moviepy - Done !
Moviepy - video ready output_combined.mp4
