In [13]:
# !pip install moviepy

In [14]:
import os
input_file = "video/rsl/RSL1_GCC.mp4"

video_directory = os.path.join(os.path.dirname(input_file), "output") #Local output directory for split files
gcs_bucket_name = "fab_public_bucket"  # GCS Bucket to use for temp folder
gcs_temp_working_dir = "video/temp_sport"  # Path to temp folder in gcs bucket
split_size = "500M"

In [15]:
import subprocess

def split_mkv(input_file, output_file, split_size):
  """Splits an MKV file into smaller chunks using mkvmerge.

  Args:
    input_file: Path to the input MKV file.
    output_file: Base name for the output files.
    split_size: Size of each output file in bytes (b), kilobytes (k), 
                megabytes (m), or gigabytes (g).
  """

  try:
    cmd = [
        "mkvmerge",
        "--split", f"size:{split_size}",
        "-o", f"{output_file}.mp4",
        input_file
    ]
    subprocess.run(cmd, check=True)
    print(f"Successfully split {input_file} into {split_size} chunks.")
  except subprocess.CalledProcessError as e:
    print(f"Error splitting the file: {e}")

# Example usage:

parts = input_file.split('/')
output_file = '/'.join(parts[:-1]) + "/output/" + parts[-1].split('.')[0]  # String manipulation


split_mkv(input_file, output_file, split_size)

mkvmerge v52.0.0 ('Secret For The Mad') 64-bit
'video/rsl/RSL1_GCC.mp4': Using the demultiplexer for the format 'QuickTime/MP4'.
'video/rsl/RSL1_GCC.mp4' track 0: Using the output module for the format 'AVC/H.264'.
'video/rsl/RSL1_GCC.mp4' track 1: Using the output module for the format 'AAC'.
The file 'video/rsl/output/RSL1_GCC-001.mp4' has been opened for writing.
'video/rsl/RSL1_GCC.mp4' track 0: Extracted the aspect ratio information from the MPEG-4 layer 10 (AVC) video data and set the display dimensions to 1920/1080.
Progress: 8%
The cue entries (the index) are being written...
The file 'video/rsl/output/RSL1_GCC-002.mp4' has been opened for writing.
Progress: 16%
The cue entries (the index) are being written...
The file 'video/rsl/output/RSL1_GCC-003.mp4' has been opened for writing.
Progress: 24%
The cue entries (the index) are being written...
The file 'video/rsl/output/RSL1_GCC-004.mp4' has been opened for writing.
Progress: 32%
The cue entries (the index) are being written..

In [16]:
from google.cloud import storage
import os

def copy_local_folder_to_gcs(local_folder, bucket_name, gcs_folder):
    """Copies all files from a local folder to a GCS bucket folder.

    Args:
        local_folder: The path to the local folder.
        bucket_name: The name of the GCS bucket.
        gcs_folder: The name of the folder within the GCS bucket (can be empty).
    """

    # Initialize the GCS client
    storage_client = storage.Client()

    # Get the bucket
    bucket = storage_client.bucket(bucket_name)

    blobs = bucket.list_blobs(prefix=gcs_folder)
    for blob in blobs:
        blob.delete()
        
    # Iterate through all files in the local folder
    for local_file in os.listdir(local_folder):
        local_file_path = os.path.join(local_folder, local_file)

        # Ensure it's a file, not a subdirectory
        if os.path.isfile(local_file_path):
            # Construct the GCS object path
            gcs_blob_name = os.path.join(gcs_folder, local_file) if gcs_folder else local_file

            # Create a blob object
            blob = bucket.blob(gcs_blob_name)

            # Upload the file
            try:
                blob.upload_from_filename(local_file_path)
                print(f"File {local_file_path} uploaded to gs://{bucket_name}/{gcs_blob_name}")
            except Exception as e:
                print(f"Error uploading {local_file_path}: {e}")

copy_local_folder_to_gcs(video_directory, gcs_bucket_name, gcs_temp_working_dir)

File video/rsl/output/RSL1_GCC-002.mp4 uploaded to gs://fab_public_bucket/video/temp_sport/RSL1_GCC-002.mp4
File video/rsl/output/RSL1_GCC-011.mp4 uploaded to gs://fab_public_bucket/video/temp_sport/RSL1_GCC-011.mp4
File video/rsl/output/RSL1_GCC-006.mp4 uploaded to gs://fab_public_bucket/video/temp_sport/RSL1_GCC-006.mp4
File video/rsl/output/RSL1_GCC-009.mp4 uploaded to gs://fab_public_bucket/video/temp_sport/RSL1_GCC-009.mp4
File video/rsl/output/RSL1_GCC-008.mp4 uploaded to gs://fab_public_bucket/video/temp_sport/RSL1_GCC-008.mp4
File video/rsl/output/RSL1_GCC-004.mp4 uploaded to gs://fab_public_bucket/video/temp_sport/RSL1_GCC-004.mp4
File video/rsl/output/RSL1_GCC-001.mp4 uploaded to gs://fab_public_bucket/video/temp_sport/RSL1_GCC-001.mp4
File video/rsl/output/RSL1_GCC-013.mp4 uploaded to gs://fab_public_bucket/video/temp_sport/RSL1_GCC-013.mp4
File video/rsl/output/RSL1_GCC-010.mp4 uploaded to gs://fab_public_bucket/video/temp_sport/RSL1_GCC-010.mp4
File video/rsl/output/RSL1_G

In [17]:
import os

def list_files_in_directory(directory):
  """Lists all files in a directory and returns them as a list.

  Args:
    directory: The path to the directory.

  Returns:
    A list of strings, where each string is the full path to a file 
    in the directory. Returns an empty list if the directory is empty 
    or if the directory does not exist.
  """
  try:
    file_list = []
    for filename in os.listdir(directory):
      full_path = os.path.join(directory, filename)
      if os.path.isfile(full_path):  # Only add files, not subdirectories
        file_list.append(full_path)
    return file_list
  except FileNotFoundError:
    print(f"Directory '{directory}' not found.")
    return []
print(list_files_in_directory(video_directory))

['video/rsl/output/RSL1_GCC-002.mp4', 'video/rsl/output/RSL1_GCC-011.mp4', 'video/rsl/output/RSL1_GCC-006.mp4', 'video/rsl/output/RSL1_GCC-003.mp4', 'video/rsl/output/RSL1_GCC-005.mp4', 'video/rsl/output/RSL1_GCC-007.mp4', 'video/rsl/output/RSL1_GCC-009.mp4', 'video/rsl/output/RSL1_GCC-008.mp4', 'video/rsl/output/RSL1_GCC-004.mp4', 'video/rsl/output/RSL1_GCC-001.mp4', 'video/rsl/output/RSL1_GCC-013.mp4', 'video/rsl/output/RSL1_GCC-010.mp4', 'video/rsl/output/RSL1_GCC-012.mp4']


In [18]:
import random


def get_random_region():
    """Returns a random region from the provided list of regions.
    Args:
        regions_list: A list of strings, where each string is a region name.
    Returns:
        A randomly selected region string from the list.
    """
    
    regions_string = "europe-west4, europe-west9, europe-west2, europe-west3, europe-west1, europe-west6, europe-southwest1, europe-west8, europe-north1, europe-central2, us-east5, us-south1, us-central1, us-west4, us-east1, us-east4, us-west1"
    regions_list = [region.strip() for region in regions_string.split(',')]

    return random.choice(regions_list)


# Example usage:
# random_region = get_random_region(regions_list)
# print(f"Randomly selected region: {random_region}")

In [19]:
# import base64
import vertexai
from vertexai.generative_models import GenerativeModel, Part, SafetySetting

def generate(user_prompt, system_prompt, random_region, video_uri):
    vertexai.init(project="testfab-362608", location=random_region)

    final_response = ''
    model = GenerativeModel(
        "gemini-1.5-pro-001",
        system_instruction=[system_prompt]
    )
    
    video1 = Part.from_uri(
    mime_type="video/mp4",
    uri=video_uri,)
    
    generation_config = {
        "max_output_tokens": 8192,
        "temperature": 1,
        "top_p": 0.95,
        "response_mime_type": "application/json"
    }
    
    safety_settings = [
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
        threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
        SafetySetting(
            category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
            threshold=SafetySetting.HarmBlockThreshold.OFF
        ),
    ]
    
    response = model.generate_content(
        [video1, user_prompt],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=False,
    )

    return response.text

user_prompt = """<INSTRUCTION>
Analyze the provided football (soccer) game video and identify the single most impactful and engaging moment. This moment should be characterized by a combination of factors, including but not limited to:

High-stakes plays: Identify moments with a significant potential impact on the game's outcome (e.g., goal-scoring opportunities, crucial defensive stops).

Exciting action: Look for sequences involving rapid player movements, skillful dribbling, inventive passes, or intense physical battles for possession.

Goal scoring events: Highlight any goals scored, analyzing the build-up, the execution of the shot, and the immediate reaction of players and fans. Goal scoring events are from far the most important enven and have to apear in the highlight.

Near misses: Identify moments when a goal almost occurred, such as shots hitting the post, spectacular saves by the goalkeeper, or last-second defensive clearances.

Controversial incidents: Focus on any controversial plays, big fouls, potential penalties, or referee decisions that significantly impacted the flow or outcome of the game, even if they did not result in a goal.

Game-Changing Moments: Consider events that altered the momentum of the game or dramatically shifted the probability of either team winning.

From your analysis, provide the exact start and end timestamps (in the format MM:SS) of the most outstanding moment, and briefly (1-2 sentences) explain why you chose that specific moment based on the criteria listed above. Justify the inclusion of the chosen incident.

A sequence will be minimum 10 seconds.
</INSTRUCTION>


<OUTPUT Format> 
JSON
[
{
start_timestamp: \"mm:ss\",
end_timestamp: \"mm:ss\",
reasoning: \"\"
}
]
</OUTPUT Format>"""



# user_prompt = """<INSTRUCTION>
# Analyze the attached football video and identify the precise timestamps of successful goals. For each goal, provide:

# Goal Timestamp: The exact moment the ball crosses the goal line, resulting in a score.
# Start of Action: The timestamp approximately 8 seconds before the Goal Timestamp. This is the start of the goal scoring sequence.
# End of Action: The timestamp approximately 1 second after the Goal Timestamp.
# Focus specifically on goals where the ball definitively enters the net and the score is officially updated. Ignore near misses, shots on target that are saved, fouls, and other in-game actions.
# It may happen there are no goals, the return "no goals" in "reasoning".
# </INSTRUCTION>


# <OUTPUT Format> 
# JSON
# [
# {
# start_timestamp: \"mm:ss\",
# end_timestamp: \"mm:ss\",
# reasoning: \"\"
# }
# ]
# </OUTPUT Format>"""



system_prompt = """You are a expert in content creation and generation. You never miss any key frames in the video which can be used for youtube shorts video generation. You possess a deep understanding of visual storytelling, pacing, and audience engagement techniques. Your goal is to distill the essence of a video into a concise and captivating highlight reel."""


files = list_files_in_directory(video_directory)
final_res = {}

for file in files:
    region = get_random_region()
    filename = file.split("/")[-1]
    video_uri = f"gs://{gcs_bucket_name}/{gcs_temp_working_dir}/{filename}"
    print(f"Region chosen: {region}")
    res=generate(user_prompt, system_prompt, region,video_uri)
    final_res[filename] = res

Region chosen: europe-central2
Region chosen: us-west4
Region chosen: europe-west1
Region chosen: us-west1
Region chosen: us-east4
Region chosen: europe-central2
Region chosen: us-east1
Region chosen: europe-west3
Region chosen: europe-central2
Region chosen: us-west1
Region chosen: europe-west3
Region chosen: us-east5
Region chosen: europe-west9


In [20]:
from google import genai
from google.genai import types
import base64

def generate(user_prompt):
  client = genai.Client(
      vertexai=True,
      project="testfab-362608",
      location="us-central1"
  )

  res=[]

  text1 = types.Part.from_text(user_prompt)

  model = "gemini-exp-1206"
  contents = [
    types.Content(
      role="user",
      parts=[
        text1
      ]
    )
  ]
  generate_content_config = types.GenerateContentConfig(
    temperature = 1,
    top_p = 1,
    seed = 0,
    max_output_tokens = 8192,
    response_modalities = ["TEXT"],
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="BLOCK_MEDIUM_AND_ABOVE"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="BLOCK_MEDIUM_AND_ABOVE"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="BLOCK_MEDIUM_AND_ABOVE"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="BLOCK_MEDIUM_AND_ABOVE"
    )],
    response_mime_type = "application/json",
  )

  answer=''
  for chunk in client.models.generate_content_stream(
    model = model,
    contents = contents,
    config = generate_content_config,
    ):
    res.append(chunk)
    
  return res

user_prompt = """<Instruction>
You have a JSON content that presents timestamp for goal found in multiple videos which are the results of the split of one large video. 
Output these different JSON docuements in one array following the provided format.
</Instruction>

<JSON_Content>
"""+str(final_res)+"""
</JSON_Content>

<Output_Format>
[
{'filename':'',
'start_timestamp': "mm:ss",
'end_timestamp': "mm:ss"},
{'filename':'',
'start_timestamp': 'mm:ss',
'end_timestamp': 'mm:ss'},
...]
</Output_Format>"""

system_prompt = """You are a expert in content creation and generation. You never miss any key frames in the video which can be used for youtube shorts video generation. You possess a deep understanding of visual storytelling, pacing, and audience engagement techniques. Your goal is to distill the essence of a video into a concise and captivating highlight reel."""


results = generate(user_prompt)
final_highlight=''
for res in results:
 final_highlight = final_highlight + res.candidates[0].content.parts[0].text


print(final_highlight)
# res[0].candidates[0].content.parts[0].text

[
  {
    "filename": "RSL1_GCC-002.mp4",
    "start_timestamp": "00:00",
    "end_timestamp": "00:17"
  },
  {
    "filename": "RSL1_GCC-011.mp4",
    "start_timestamp": "05:22",
    "end_timestamp": "06:16"
  },
  {
    "filename": "RSL1_GCC-006.mp4",
    "start_timestamp": "00:27",
    "end_timestamp": "00:48"
  },
  {
    "filename": "RSL1_GCC-003.mp4",
    "start_timestamp": "06:39",
    "end_timestamp": "06:50"
  },
  {
    "filename": "RSL1_GCC-005.mp4",
    "start_timestamp": "01:16",
    "end_timestamp": "02:57"
  },
  {
    "filename": "RSL1_GCC-007.mp4",
    "start_timestamp": "00:07",
    "end_timestamp": "00:18"
  },
  {
    "filename": "RSL1_GCC-009.mp4",
    "start_timestamp": "00:39",
    "end_timestamp": "01:09"
  },
  {
    "filename": "RSL1_GCC-008.mp4",
    "start_timestamp": "00:00",
    "end_timestamp": "00:12"
  },
  {
    "filename": "RSL1_GCC-004.mp4",
    "start_timestamp": "01:04",
    "end_timestamp": "01:23"
  },
  {
    "filename": "RSL1_GCC-001.mp4",
    

In [21]:
import json

def extract_tuples(json_string):
  """
  Extracts tuples from a JSON string and appends them to a list.

  Args:
    json_string: A string in JSON format containing highlight data.

  Returns:
    A list of tuples, where each tuple represents a highlight 
    with (filename, start_timestamp, end_timestamp).
  """
  data = json.loads(json_string)
  highlights_list = []

  for highlight in data:  # No need to use .get("highlights", []) here since the list is at the top level
    filename = highlight.get("filename")
    start_timestamp = highlight.get("start_timestamp")
    end_timestamp = highlight.get("end_timestamp")

    if filename and start_timestamp and end_timestamp:
      highlights_list.append((filename, start_timestamp, end_timestamp))

  return highlights_list

# Example Usage:
json_string = final_highlight
split_for_video = extract_tuples(json_string)
print(split_for_video)

[('RSL1_GCC-002.mp4', '00:00', '00:17'), ('RSL1_GCC-011.mp4', '05:22', '06:16'), ('RSL1_GCC-006.mp4', '00:27', '00:48'), ('RSL1_GCC-003.mp4', '06:39', '06:50'), ('RSL1_GCC-005.mp4', '01:16', '02:57'), ('RSL1_GCC-007.mp4', '00:07', '00:18'), ('RSL1_GCC-009.mp4', '00:39', '01:09'), ('RSL1_GCC-008.mp4', '00:00', '00:12'), ('RSL1_GCC-004.mp4', '01:04', '01:23'), ('RSL1_GCC-001.mp4', '00:28', '01:10'), ('RSL1_GCC-013.mp4', '00:29', '00:40'), ('RSL1_GCC-010.mp4', '00:57', '01:19'), ('RSL1_GCC-012.mp4', '06:00', '06:12')]


In [22]:
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from moviepy import VideoFileClip
import os

def cut_video(input_file, output_file, start_time, end_time):
  """
  Cuts a video file from start_time to end_time using moviepy.

  Args:
    input_file: Path to the input video file.
    output_file: Path to save the output (cut) video file.
    start_time: Start time in "mm:ss" format (e.g., "01:30").
    end_time: End time in "mm:ss" format (e.g., "02:15").
  """

  def time_to_seconds(time_str):
      """Converts a time string in mm:ss format to seconds."""
      minutes, seconds = map(int, time_str.split(':'))
      return minutes * 60 + seconds

  start_seconds = time_to_seconds(start_time)
  print(start_seconds)
  end_seconds = time_to_seconds(end_time)
  print(end_seconds)
  
  # Method 1 (More Precise, but can be slower for large files or many cuts)
  # Uses VideoFileClip for loading, which allows for frame-accurate cutting
  # More suitable for editing workflows where you might be doing other
  # manipulations with the clip beyond just cutting.
  try:
      with VideoFileClip(input_file) as video:
          subclip = video.subclip(start_seconds, end_seconds)
          subclip.write_videofile(output_file)
  except Exception as e:
      print(f"Error using VideoFileClip: {e}")
      print("Falling back to ffmpeg_extract_subclip")
  
      # Method 2 (Faster, but may not be frame-accurate)
      # Uses ffmpeg_extract_subclip which is a direct call to ffmpeg
      # Much faster for simple cutting, especially with large files.
      # Precision depends on keyframe placement in the original video.
      try:
          ffmpeg_extract_subclip(input_file, start_seconds, end_seconds, targetname=output_file)
      except Exception as e:
          print(f"Error using ffmpeg_extract_subclip: {e}")
          

# Example Usage:
# input_video = "input.mp4"
# output_video = "output.mp4"
# start_time = "00:10"
# end_time = "00:30"

# cut_video(input_video, output_video, start_time, end_time)



# video clips
# from moviepy import *
 
# # loading video dsa gfg intro video
# clip = VideoFileClip("video/hajjan/output/Hajjan_KSA-001.mp4")
 
# # getting subclip as video is large
# chunk = clip.subclipped(55, 100)
# chunk.write_videofile("video/hajjan/output/chunk/Hajjan_KSA-001_1.mp4")
# clip.close()
# showing clip
# clip.ipython_display(width = 480)

chunk_directory = os.path.join(video_directory, 'chunk')
try:
    os.makedirs(chunk_directory)
    print(f"Directory '{chunk_directory}' created successfully.")
except FileExistsError:
    print(f"Directory '{chunk_directory}' already exists.")

for split in split_for_video:
    video_source = video_directory+"/"+split[0]
    video_destination = chunk_directory+"/"+split[0].split(".")[0]+"-"+split[1]+"_"+split[2]+".mp4"
    clip = VideoFileClip(video_source)
    chunk = clip.subclipped(split[1], split[2])
    chunk.write_videofile(video_destination)
    chunk.close()
    clip.close()

    
    
    # cut_video(video_source ,video_destination,split[1],split[2])
    


Directory 'video/rsl/output/chunk' already exists.
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '8487780', 'DURATION-eng': '00:08:11.000000000', 'NUMBER_OF_FRAMES-eng': '12275', 'NUMBER_OF_BYTES-eng': '520937523', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': '', 'BPS-eng': '128827', 'DURATION-eng': '

                                                                   

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-002-00:00_00:17.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-002-00:00_00:17.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '8776412', 'DURATION-eng': '00:07:51.600000000', 'NUMBER_OF_FRAMES-eng': '11790', 'NUMBER_OF_BYTES-eng': '517369534', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': ''

                                                                     

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-011-05:22_06:16.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-011-05:22_06:16.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '7672195', 'DURATION-eng': '00:09:06.880000000', 'NUMBER_OF_FRAMES-eng': '13672', 'NUMBER_OF_BYTES-eng': '524471290', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': ''

                                                                   

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-006-00:27_00:48.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-006-00:27_00:48.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '8341140', 'DURATION-eng': '00:08:20.280000000', 'NUMBER_OF_FRAMES-eng': '12507', 'NUMBER_OF_BYTES-eng': '521613209', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': ''

                                                                   

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-003-06:39_06:50.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-003-06:39_06:50.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '8650441', 'DURATION-eng': '00:08:00.280000000', 'NUMBER_OF_FRAMES-eng': '12007', 'NUMBER_OF_BYTES-eng': '519329226', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': ''

                                                                     

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-005-01:16_02:57.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-005-01:16_02:57.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '7639136', 'DURATION-eng': '00:09:02.720000000', 'NUMBER_OF_FRAMES-eng': '13568', 'NUMBER_OF_BYTES-eng': '518239035', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': ''

                                                                   

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-007-00:07_00:18.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-007-00:07_00:18.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '9128219', 'DURATION-eng': '00:07:37.840000000', 'NUMBER_OF_FRAMES-eng': '11446', 'NUMBER_OF_BYTES-eng': '522407985', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': ''

                                                                   

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-009-00:39_01:09.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-009-00:39_01:09.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '8400533', 'DURATION-eng': '00:08:12.000000000', 'NUMBER_OF_FRAMES-eng': '12300', 'NUMBER_OF_BYTES-eng': '516632832', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': ''

                                                                   

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-008-00:00_00:12.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-008-00:00_00:12.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '8773492', 'DURATION-eng': '00:07:56.200000000', 'NUMBER_OF_FRAMES-eng': '11905', 'NUMBER_OF_BYTES-eng': '522242122', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': ''

                                                                   

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-004-01:04_01:23.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-004-01:04_01:23.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '7643247', 'DURATION-eng': '00:09:00.280000000', 'NUMBER_OF_FRAMES-eng': '13507', 'NUMBER_OF_BYTES-eng': '516186731', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': ''

                                                                   

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-001-00:28_01:10.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-001-00:28_01:10.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '8066997', 'DURATION-eng': '00:01:48.680000000', 'NUMBER_OF_FRAMES-eng': '2717', 'NUMBER_OF_BYTES-eng': '109590159', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': '',

                                                                   

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-013-00:29_00:40.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-013-00:29_00:40.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '8854186', 'DURATION-eng': '00:07:51.320000000', 'NUMBER_OF_FRAMES-eng': '11783', 'NUMBER_OF_BYTES-eng': '521644369', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': ''

                                                                   

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-010-00:57_01:19.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-010-00:57_01:19.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'encoder': 'libebml v1.4.2 + libmatroska v1.6.2', 'creation_time': '2025-01-24T09:47:09.000000Z'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': None, 'fps': 25.0, 'codec_name': 'h264', 'profile': '(High 4:2:2)', 'metadata': {'Metadata': '', 'BPS-eng': '8233026', 'DURATION-eng': '00:08:24.760000000', 'NUMBER_OF_FRAMES-eng': '12619', 'NUMBER_OF_BYTES-eng': '519462796', '_STATISTICS_WRITING_APP-eng': "mkvmerge v52.0.0 ('Secret For The Mad') 64-bit", '_STATISTICS_WRITING_DATE_UTC-eng': '2025-01-24 09:47:09', '_STATISTICS_TAGS-eng': 'BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 48000, 'bitrate': None, 'metadata': {'Metadata': ''

                                                                   

MoviePy - Done.
MoviePy - Writing video video/rsl/output/chunk/RSL1_GCC-012-06:00_06:12.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready video/rsl/output/chunk/RSL1_GCC-012-06:00_06:12.mp4


In [23]:
import re
import os
import datetime

def get_sorted_files(folder_path):
    """
    Returns a list of .mp4 files in the given folder, sorted by their index and start time.
    The index is assumed to be in the middle of the filename, e.g., 001 for Hajjan_KSA-001-07:43_07:55.mp4.
    The start time is extracted from the filename in the format MM:SS.
    """
    files = [f for f in os.listdir(folder_path) if f.endswith('.mp4')]

    # Sort files by index and then start time (using MM:SS format)
    files.sort(key=lambda f: (int(re.search(r'-(\d{3})-', f).group(1)),
                              datetime.datetime.strptime(re.search(r'-(\d{2}:\d{2})_', f).group(1), '%M:%S')))

    # Join the folder path and file name to get the full path
    full_paths = [os.path.join(folder_path, file) for file in files]
    return full_paths
order_file_list = get_sorted_files(chunk_directory)
order_file_list

['video/rsl/output/chunk/RSL1_GCC-001-00:28_01:10.mp4',
 'video/rsl/output/chunk/RSL1_GCC-002-00:00_00:17.mp4',
 'video/rsl/output/chunk/RSL1_GCC-003-06:39_06:50.mp4',
 'video/rsl/output/chunk/RSL1_GCC-004-01:04_01:23.mp4',
 'video/rsl/output/chunk/RSL1_GCC-005-01:16_02:57.mp4',
 'video/rsl/output/chunk/RSL1_GCC-006-00:27_00:48.mp4',
 'video/rsl/output/chunk/RSL1_GCC-007-00:07_00:18.mp4',
 'video/rsl/output/chunk/RSL1_GCC-008-00:00_00:12.mp4',
 'video/rsl/output/chunk/RSL1_GCC-009-00:39_01:09.mp4',
 'video/rsl/output/chunk/RSL1_GCC-010-00:57_01:19.mp4',
 'video/rsl/output/chunk/RSL1_GCC-011-05:22_06:16.mp4',
 'video/rsl/output/chunk/RSL1_GCC-012-06:00_06:12.mp4',
 'video/rsl/output/chunk/RSL1_GCC-013-00:29_00:40.mp4']

In [24]:
import subprocess
import os

def merge_mp4_files(input_files, output_file):
  """
  Merges multiple MP4 files into a single MKV file using mkvmerge.

  Args:
    input_files: A list of paths to the MP4 files to merge, in the desired order.
    output_file: The path to save the output MKV file.
  """

  if not input_files:
      raise ValueError("Input file list cannot be empty.")

  if not all(os.path.exists(file) for file in input_files):
      raise FileNotFoundError("One or more input files do not exist.")

  if not output_file.endswith(".mkv"):
      output_file += ".mkv"

  # Build the mkvmerge command
  command = ["mkvmerge", "-o", output_file]

  # Add the first input file
  command.append(input_files[0])

  # Add the rest of the input files with +
  for file in input_files[1:]:
      command.append("+")
      command.append(file)
  
  try:
      # Run mkvmerge
      subprocess.run(command, check=True, capture_output=True, text=True)
      print(f"Successfully merged files into: {output_file}")
  
  except subprocess.CalledProcessError as e:
      print(f"Error during merging:")
      print(f"  Return code: {e.returncode}")
      print(f"  Stdout: {e.stdout}")
      print(f"  Stderr: {e.stderr}")
      raise

# Example Usage:

# input_file = "video/hajjan/Hajjan_KSA.mp4"

base_name, ext = os.path.splitext(input_file)
temp_fullpath_outputfile = base_name + "_trailer" + ext
print(f"temp_fullpath_outputfile: {temp_fullpath_outputfile}")
# merge_mp4_files(order_file_list, 
fullpath_outputdir = chunk_directory
print(f"fullpath_outputdir:{fullpath_outputdir}")
# output_file = "merged_video.mkv"
fullpath_outputfile = fullpath_outputdir+"/"+os.path.basename(temp_fullpath_outputfile)
print(fullpath_outputfile)
merge_mp4_files(order_file_list, fullpath_outputfile)

temp_fullpath_outputfile: video/rsl/RSL1_GCC_trailer.mp4
fullpath_outputdir:video/rsl/output/chunk
video/rsl/output/chunk/RSL1_GCC_trailer.mp4
Successfully merged files into: video/rsl/output/chunk/RSL1_GCC_trailer.mp4.mkv
