In [1]:
# !pip install moviepy

In [2]:
import os
input_file = "video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA.mp4"

video_directory = os.path.join(os.path.dirname(input_file), "output") #Local output directory for split files
gcs_bucket_name = "fab_public_bucket"  # GCS Bucket to use for temp folder
gcs_temp_working_dir = "video/temp"  # Path to temp folder in gcs bucket
split_size = "500M"

In [3]:
import subprocess

def split_mkv(input_file, output_file, split_size):
  """Splits an MKV file into smaller chunks using mkvmerge.

  Args:
    input_file: Path to the input MKV file.
    output_file: Base name for the output files.
    split_size: Size of each output file in bytes (b), kilobytes (k), 
                megabytes (m), or gigabytes (g).
  """

  try:
    cmd = [
        "mkvmerge",
        "--split", f"size:{split_size}",
        "-o", f"{output_file}.mp4",
        input_file
    ]
    subprocess.run(cmd, check=True)
    print(f"Successfully split {input_file} into {split_size} chunks.")
  except subprocess.CalledProcessError as e:
    print(f"Error splitting the file: {e}")

# Example usage:

parts = input_file.split('/')
output_file = '/'.join(parts[:-1]) + "/output/" + parts[-1].split('.')[0]  # String manipulation


split_mkv(input_file, output_file, split_size)

mkvmerge v52.0.0 ('Secret For The Mad') 64-bit
'video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA.mp4': Using the demultiplexer for the format 'QuickTime/MP4'.
'video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA.mp4' track 0: Using the output module for the format 'AVC/H.264'.
'video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA.mp4' track 1: Using the output module for the format 'AAC'.
The file 'video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-001.mp4' has been opened for writing.
'video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA.mp4' track 0: Extracted the aspect ratio information from the MPEG-4 layer 10 (AVC) video data and set the display dimensions to 3840/2160.
Progress: 15%
The cue entries (the index) are being written...
The file 'video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-002.mp4' has been opened for writing.
Progress: 31%
The cue entries (the index) are being written...
The file 'video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-003.mp4' has been 

In [4]:
from google.cloud import storage
import os

def copy_local_folder_to_gcs(local_folder, bucket_name, gcs_folder):
    """Copies all files from a local folder to a GCS bucket folder.

    Args:
        local_folder: The path to the local folder.
        bucket_name: The name of the GCS bucket.
        gcs_folder: The name of the folder within the GCS bucket (can be empty).
    """

    # Initialize the GCS client
    storage_client = storage.Client()

    # Get the bucket
    bucket = storage_client.bucket(bucket_name)

    blobs = bucket.list_blobs(prefix=gcs_folder)
    for blob in blobs:
        blob.delete()
        
    # Iterate through all files in the local folder
    for local_file in os.listdir(local_folder):
        local_file_path = os.path.join(local_folder, local_file)

        # Ensure it's a file, not a subdirectory
        if os.path.isfile(local_file_path):
            # Construct the GCS object path
            gcs_blob_name = os.path.join(gcs_folder, local_file) if gcs_folder else local_file

            # Create a blob object
            blob = bucket.blob(gcs_blob_name)

            # Upload the file
            try:
                blob.upload_from_filename(local_file_path)
                print(f"File {local_file_path} uploaded to gs://{bucket_name}/{gcs_blob_name}")
            except Exception as e:
                print(f"Error uploading {local_file_path}: {e}")

copy_local_folder_to_gcs(video_directory, gcs_bucket_name, gcs_temp_working_dir)

File video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-007.mp4 uploaded to gs://fab_public_bucket/video/temp/Thanawyat_Al_Nasim_1_KSA-007.mp4
File video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-002.mp4 uploaded to gs://fab_public_bucket/video/temp/Thanawyat_Al_Nasim_1_KSA-002.mp4
File video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-001.mp4 uploaded to gs://fab_public_bucket/video/temp/Thanawyat_Al_Nasim_1_KSA-001.mp4
File video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-003.mp4 uploaded to gs://fab_public_bucket/video/temp/Thanawyat_Al_Nasim_1_KSA-003.mp4
File video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-005.mp4 uploaded to gs://fab_public_bucket/video/temp/Thanawyat_Al_Nasim_1_KSA-005.mp4
File video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-006.mp4 uploaded to gs://fab_public_bucket/video/temp/Thanawyat_Al_Nasim_1_KSA-006.mp4
File video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-004.mp4 uploaded to gs://fab_public_bucket/v

In [5]:
import os

def list_files_in_directory(directory):
  """Lists all files in a directory and returns them as a list.

  Args:
    directory: The path to the directory.

  Returns:
    A list of strings, where each string is the full path to a file 
    in the directory. Returns an empty list if the directory is empty 
    or if the directory does not exist.
  """
  try:
    file_list = []
    for filename in os.listdir(directory):
      full_path = os.path.join(directory, filename)
      if os.path.isfile(full_path):  # Only add files, not subdirectories
        file_list.append(full_path)
    return file_list
  except FileNotFoundError:
    print(f"Directory '{directory}' not found.")
    return []
print(list_files_in_directory(video_directory))

['video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-007.mp4', 'video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-002.mp4', 'video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-001.mp4', 'video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-003.mp4', 'video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-005.mp4', 'video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-006.mp4', 'video/thanawyat_al_nasim/output/Thanawyat_Al_Nasim_1_KSA-004.mp4']


In [6]:
import random


def get_random_region():
    """Returns a random region from the provided list of regions.
    Args:
        regions_list: A list of strings, where each string is a region name.
    Returns:
        A randomly selected region string from the list.
    """
    
    regions_string = "europe-west4, europe-west9, europe-west2, europe-west3, europe-west1, europe-west6, europe-southwest1, europe-west8, europe-north1, europe-central2, us-east5, us-south1, us-central1, us-west4, us-east1, us-east4, us-west1"
    regions_list = [region.strip() for region in regions_string.split(',')]

    return random.choice(regions_list)


# Example usage:
# random_region = get_random_region(regions_list)
# print(f"Randomly selected region: {random_region}")

In [7]:
# import base64
import vertexai
from vertexai.generative_models import GenerativeModel, Part, SafetySetting

def generate(user_prompt, system_prompt, random_region, video_uri):
    vertexai.init(project="testfab-362608", location=random_region)

    final_response = ''
    model = GenerativeModel(
        "gemini-1.5-pro-001",
        system_instruction=[system_prompt]
    )
    
    video1 = Part.from_uri(
    mime_type="video/mp4",
    uri=video_uri,)
    
    generation_config = {
        "max_output_tokens": 8192,
        "temperature": 1,
        "top_p": 0.95,
        "response_mime_type": "application/json"
    }
    
    safety_settings = [
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
        threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
    ]
    
    response = model.generate_content(
        [video1, user_prompt],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=False,
    )

    return response.text

user_prompt = """<INSTRUCTIONS> 
Analyze the input video to identify all the key moments, themes, and emotional arcs. 
Recommend a list of top 4 key relevant video frame clips timestamps from the given input video thoughout the complete video. 
The maximum duration (end_timestamp - start_timestamp) of video frame clip will exclusively be below 10 seconds.
The last recommendation should be at the very end of the video.
</INSTRUCTIONS> 

<OUTPUT Format> 
JSON
[
{
start_timestamp: \"mm:ss\",
end_timestamp: \"mm:ss\",
reasoning: \"\"
}
]
</OUTPUT Format>"""

system_prompt = """You are a expert in content creation and generation. You never miss any key frames in the video which can be used for youtube shorts video generation. You possess a deep understanding of visual storytelling, pacing, and audience engagement techniques. Your goal is to distill the essence of a video into a concise and captivating highlight reel."""


files = list_files_in_directory(video_directory)
final_res = {}

for file in files:
    region = get_random_region()
    filename = file.split("/")[-1]
    video_uri = f"gs://{gcs_bucket_name}/{gcs_temp_working_dir}/{filename}"
    print(f"Region chosen: {region}")
    res=generate(user_prompt, system_prompt, region,video_uri)
    final_res[filename] = res

Region chosen: us-central1
Region chosen: us-east5
Region chosen: europe-west2
Region chosen: europe-west8
Region chosen: us-east5
Region chosen: us-east1
Region chosen: us-south1


In [8]:
print(final_res)

{'Thanawyat_Al_Nasim_1_KSA-007.mp4': '[{"start_timestamp": "00:00", "end_timestamp": "00:04", "reasoning": "The video starts with a group of four young men walking down a street at night. The men are dressed in white and they are each carrying a stick. The scene sets the tone for the rest of the video, which is dark and intense. The use of slow motion and close-ups creates a sense of suspense. The editing is fast-paced and the music is dramatic. All of these elements work together to create a powerful and engaging opening sequence."}, {"start_timestamp": "00:04", "end_timestamp": "00:19", "reasoning": " This section of the video is a montage of close-up shots of the four young men. The shots are intercut with each other, creating a sense of rhythm and energy. The men\'s expressions are intense and focused, suggesting that they are on a mission. The use of slow motion and the dramatic music add to the sense of suspense and anticipation."}, {"start_timestamp": "00:19", "end_timestamp": "

In [9]:
from google import genai
from google.genai import types
import base64

def generate(user_prompt):
  client = genai.Client(
      vertexai=True,
      project="testfab-362608",
      location="us-central1"
  )

  res=[]

  text1 = types.Part.from_text(user_prompt)

  model = "gemini-exp-1206"
  contents = [
    types.Content(
      role="user",
      parts=[
        text1
      ]
    )
  ]
  generate_content_config = types.GenerateContentConfig(
    temperature = 1,
    top_p = 1,
    seed = 0,
    max_output_tokens = 8192,
    response_modalities = ["TEXT"],
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="BLOCK_MEDIUM_AND_ABOVE"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="BLOCK_MEDIUM_AND_ABOVE"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="BLOCK_MEDIUM_AND_ABOVE"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="BLOCK_MEDIUM_AND_ABOVE"
    )],
    response_mime_type = "application/json",
  )

  answer=''
  for chunk in client.models.generate_content_stream(
    model = model,
    contents = contents,
    config = generate_content_config,
    ):
    res.append(chunk)
    
  return res

user_prompt = """<Instruction>
You have a JSON content that presents timestamp for highlights from multiple videos which are the results of the split of one large video. 
Choose the best, most spectacular highlight from the JSON content while following this constraint:
- At minimum one highlight per file.
- Maximum duration for one highlight is 10 seconds.
- Total cumulated duration (duration per highlight is calculated: end_timestamp - start_timestamp) for all the highlights is exactly 60 seconds.
- The final highlights chosen have to tell a story
</Instruction>

<JSON_Content>
"""+str(final_res)+"""
</JSON_Content>

<Output_Format>
[
{'filename':'',
'start_timestamp': "mm:ss",
'end_timestamp': "mm:ss"},
{'filename':'',
'start_timestamp': 'mm:ss',
'end_timestamp': 'mm:ss'},
...]
</Output_Format>"""

system_prompt = """You are a expert in content creation and generation. You never miss any key frames in the video which can be used for youtube shorts video generation. You possess a deep understanding of visual storytelling, pacing, and audience engagement techniques. Your goal is to distill the essence of a video into a concise and captivating highlight reel."""


results = generate(user_prompt)
final_highlight=''
for res in results:
 final_highlight = final_highlight + res.candidates[0].content.parts[0].text


print(final_highlight)
# res[0].candidates[0].content.parts[0].text

[
  {
    "filename": "Thanawyat_Al_Nasim_1_KSA-001.mp4",
    "start_timestamp": "01:25",
    "end_timestamp": "01:35"
  },
  {
    "filename": "Thanawyat_Al_Nasim_1_KSA-002.mp4",
    "start_timestamp": "06:00",
    "end_timestamp": "06:10"
  },
  {
    "filename": "Thanawyat_Al_Nasim_1_KSA-003.mp4",
    "start_timestamp": "01:29",
    "end_timestamp": "01:36"
  },
  {
    "filename": "Thanawyat_Al_Nasim_1_KSA-004.mp4",
    "start_timestamp": "01:49",
    "end_timestamp": "01:59"
  },
  {
    "filename": "Thanawyat_Al_Nasim_1_KSA-005.mp4",
    "start_timestamp": "02:49",
    "end_timestamp": "02:59"
  },
  {
    "filename": "Thanawyat_Al_Nasim_1_KSA-006.mp4",
    "start_timestamp": "01:51",
    "end_timestamp": "01:58"
  },
  {
    "filename": "Thanawyat_Al_Nasim_1_KSA-007.mp4",
    "start_timestamp": "00:19",
    "end_timestamp": "00:24"
  }
]


In [10]:
import json

def extract_tuples(json_string):
  """
  Extracts tuples from a JSON string and appends them to a list.

  Args:
    json_string: A string in JSON format containing highlight data.

  Returns:
    A list of tuples, where each tuple represents a highlight 
    with (filename, start_timestamp, end_timestamp).
  """
  data = json.loads(json_string)
  highlights_list = []

  for highlight in data:  # No need to use .get("highlights", []) here since the list is at the top level
    filename = highlight.get("filename")
    start_timestamp = highlight.get("start_timestamp")
    end_timestamp = highlight.get("end_timestamp")

    if filename and start_timestamp and end_timestamp:
      highlights_list.append((filename, start_timestamp, end_timestamp))

  return highlights_list

# Example Usage:
json_string = final_highlight
split_for_video = extract_tuples(json_string)
print(split_for_video)

[('Thanawyat_Al_Nasim_1_KSA-001.mp4', '01:25', '01:35'), ('Thanawyat_Al_Nasim_1_KSA-002.mp4', '06:00', '06:10'), ('Thanawyat_Al_Nasim_1_KSA-003.mp4', '01:29', '01:36'), ('Thanawyat_Al_Nasim_1_KSA-004.mp4', '01:49', '01:59'), ('Thanawyat_Al_Nasim_1_KSA-005.mp4', '02:49', '02:59'), ('Thanawyat_Al_Nasim_1_KSA-006.mp4', '01:51', '01:58'), ('Thanawyat_Al_Nasim_1_KSA-007.mp4', '00:19', '00:24')]


In [11]:
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from moviepy import VideoFileClip
import os

def cut_video(input_file, output_file, start_time, end_time):
  """
  Cuts a video file from start_time to end_time using moviepy.

  Args:
    input_file: Path to the input video file.
    output_file: Path to save the output (cut) video file.
    start_time: Start time in "mm:ss" format (e.g., "01:30").
    end_time: End time in "mm:ss" format (e.g., "02:15").
  """

  def time_to_seconds(time_str):
      """Converts a time string in mm:ss format to seconds."""
      minutes, seconds = map(int, time_str.split(':'))
      return minutes * 60 + seconds

  start_seconds = time_to_seconds(start_time)
  print(start_seconds)
  end_seconds = time_to_seconds(end_time)
  print(end_seconds)
  
  # Method 1 (More Precise, but can be slower for large files or many cuts)
  # Uses VideoFileClip for loading, which allows for frame-accurate cutting
  # More suitable for editing workflows where you might be doing other
  # manipulations with the clip beyond just cutting.
  try:
      with VideoFileClip(input_file) as video:
          subclip = video.subclip(start_seconds, end_seconds)
          subclip.write_videofile(output_file)
  except Exception as e:
      print(f"Error using VideoFileClip: {e}")
      print("Falling back to ffmpeg_extract_subclip")
  
      # Method 2 (Faster, but may not be frame-accurate)
      # Uses ffmpeg_extract_subclip which is a direct call to ffmpeg
      # Much faster for simple cutting, especially with large files.
      # Precision depends on keyframe placement in the original video.
      try:
          ffmpeg_extract_subclip(input_file, start_seconds, end_seconds, targetname=output_file)
      except Exception as e:
          print(f"Error using ffmpeg_extract_subclip: {e}")
          

# Example Usage:
# input_video = "input.mp4"
# output_video = "output.mp4"
# start_time = "00:10"
# end_time = "00:30"

# cut_video(input_video, output_video, start_time, end_time)



# video clips
# from moviepy import *
 
# # loading video dsa gfg intro video
# clip = VideoFileClip("video/hajjan/output/Hajjan_KSA-001.mp4")
 
# # getting subclip as video is large
# chunk = clip.subclipped(55, 100)
# chunk.write_videofile("video/hajjan/output/chunk/Hajjan_KSA-001_1.mp4")
# clip.close()
# showing clip
# clip.ipython_display(width = 480)

chunk_directory = os.path.join(video_directory, 'chunk')
try:
    os.makedirs(chunk_directory)
    print(f"Directory '{chunk_directory}' created successfully.")
except FileExistsError:
    print(f"Directory '{chunk_directory}' already exists.")

for split in split_for_video:
    video_source = video_directory+"/"+split[0]
    video_destination = chunk_directory+"/"+split[0].split(".")[0]+"-"+split[1]+"_"+split[2]+".mp4"
    clip = VideoFileClip(video_source)
    chunk = clip.subclipped(split[1], split[2])
    chunk.write_videofile(video_destination)
    chunk.close()
    clip.close()

    
    
    # cut_video(video_source ,video_destination,split[1],split[2])
    


Directory 'video/thanawyat_al_nasim/output/chunk' created successfully.
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-23T10:17:24.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [3840, 2160], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '14763821', 'DURATION-eng': '00:04:44.600000000', 'NUMBER_OF_FRAMES-eng': '7115', 'NUMBER_OF_BYTES-eng': '525222934', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-23 10:17:24', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': '', 'BPS-eng': '65303

                                                                   

MoviePy - Done.
MoviePy - Writing video video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-001-01:25_01:35.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-001-01:25_01:35.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-23T10:17:24.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [3840, 2160], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '9963033', 'DURATION-eng': '00:07:01.560000000', 'NUMBER_OF_FRAMES-eng': '10539', 'NUMBER_OF_BYTES-eng': '525002027', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-23 10:17:24', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': No

                                                                   

MoviePy - Done.
MoviePy - Writing video video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-002-06:00_06:10.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-002-06:00_06:10.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-23T10:17:24.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [3840, 2160], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '11334031', 'DURATION-eng': '00:06:08.480000000', 'NUMBER_OF_FRAMES-eng': '9212', 'NUMBER_OF_BYTES-eng': '522045490', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-23 10:17:24', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': No

                                                                   

MoviePy - Done.
MoviePy - Writing video video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-003-01:29_01:36.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-003-01:29_01:36.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-23T10:17:24.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [3840, 2160], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '10533874', 'DURATION-eng': '00:06:35.640000000', 'NUMBER_OF_FRAMES-eng': '9891', 'NUMBER_OF_BYTES-eng': '520952752', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-23 10:17:24', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': No

                                                                   

MoviePy - Done.
MoviePy - Writing video video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-004-01:49_01:59.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-004-01:49_01:59.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-23T10:17:24.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [3840, 2160], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '10357419', 'DURATION-eng': '00:06:42.360000000', 'NUMBER_OF_FRAMES-eng': '10059', 'NUMBER_OF_BYTES-eng': '520926413', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-23 10:17:24', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': N

                                                                   

MoviePy - Done.
MoviePy - Writing video video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-005-02:49_02:59.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-005-02:49_02:59.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-23T10:17:24.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [3840, 2160], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '10346907', 'DURATION-eng': '00:06:59.400000000', 'NUMBER_OF_FRAMES-eng': '10485', 'NUMBER_OF_BYTES-eng': '542436623', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-23 10:17:24', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': N

                                                                   

MoviePy - Done.
MoviePy - Writing video video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-006-01:51_01:58.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-006-01:51_01:58.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-23T10:17:24.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [3840, 2160], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '3324997', 'DURATION-eng': '00:02:38.520000000', 'NUMBER_OF_FRAMES-eng': '3963', 'NUMBER_OF_BYTES-eng': '65884825', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-23 10:17:24', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None

                                                                   

MoviePy - Done.
MoviePy - Writing video video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-007-00:19_00:24.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-007-00:19_00:24.mp4


In [12]:
import re
import os
import datetime

def get_sorted_files(folder_path):
    """
    Returns a list of .mp4 files in the given folder, sorted by their index and start time.
    The index is assumed to be in the middle of the filename, e.g., 001 for Hajjan_KSA-001-07:43_07:55.mp4.
    The start time is extracted from the filename in the format MM:SS.
    """
    files = [f for f in os.listdir(folder_path) if f.endswith('.mp4')]

    # Sort files by index and then start time (using MM:SS format)
    files.sort(key=lambda f: (int(re.search(r'-(\d{3})-', f).group(1)),
                              datetime.datetime.strptime(re.search(r'-(\d{2}:\d{2})_', f).group(1), '%M:%S')))

    # Join the folder path and file name to get the full path
    full_paths = [os.path.join(folder_path, file) for file in files]
    return full_paths
order_file_list = get_sorted_files(chunk_directory)
order_file_list

['video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-001-01:25_01:35.mp4',
 'video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-002-06:00_06:10.mp4',
 'video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-003-01:29_01:36.mp4',
 'video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-004-01:49_01:59.mp4',
 'video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-005-02:49_02:59.mp4',
 'video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-006-01:51_01:58.mp4',
 'video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA-007-00:19_00:24.mp4']

In [13]:
import subprocess
import os

def merge_mp4_files(input_files, output_file):
  """
  Merges multiple MP4 files into a single MKV file using mkvmerge.

  Args:
    input_files: A list of paths to the MP4 files to merge, in the desired order.
    output_file: The path to save the output MKV file.
  """

  if not input_files:
      raise ValueError("Input file list cannot be empty.")

  if not all(os.path.exists(file) for file in input_files):
      raise FileNotFoundError("One or more input files do not exist.")

  if not output_file.endswith(".mkv"):
      output_file += ".mkv"

  # Build the mkvmerge command
  command = ["mkvmerge", "-o", output_file]

  # Add the first input file
  command.append(input_files[0])

  # Add the rest of the input files with +
  for file in input_files[1:]:
      command.append("+")
      command.append(file)
  
  try:
      # Run mkvmerge
      subprocess.run(command, check=True, capture_output=True, text=True)
      print(f"Successfully merged files into: {output_file}")
  
  except subprocess.CalledProcessError as e:
      print(f"Error during merging:")
      print(f"  Return code: {e.returncode}")
      print(f"  Stdout: {e.stdout}")
      print(f"  Stderr: {e.stderr}")
      raise

# Example Usage:

# input_file = "video/hajjan/Hajjan_KSA.mp4"

base_name, ext = os.path.splitext(input_file)
temp_fullpath_outputfile = base_name + "_trailer" + ext
print(f"temp_fullpath_outputfile: {temp_fullpath_outputfile}")
# merge_mp4_files(order_file_list, 
fullpath_outputdir = chunk_directory
print(f"fullpath_outputdir:{fullpath_outputdir}")
# output_file = "merged_video.mkv"
fullpath_outputfile = fullpath_outputdir+"/"+os.path.basename(temp_fullpath_outputfile)
print(fullpath_outputfile)
merge_mp4_files(order_file_list, fullpath_outputfile)

temp_fullpath_outputfile: video/thanawyat_al_nasim/Thanawyat_Al_Nasim_1_KSA_trailer.mp4
fullpath_outputdir:video/thanawyat_al_nasim/output/chunk
video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA_trailer.mp4
Successfully merged files into: video/thanawyat_al_nasim/output/chunk/Thanawyat_Al_Nasim_1_KSA_trailer.mp4.mkv
