<a href="https://colab.research.google.com/github/Sibikrish3000/video-mining/blob/master/compress_videos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@markdown <br><center><img src='https://upload.wikimedia.org/wikipedia/commons/thumb/d/da/Google_Drive_logo.png/600px-Google_Drive_logo.png' height="50" alt="Gdrive-logo"/></center>
#@markdown <center><h3>Mount Gdrive to /content/drive</h3></center><br>
MODE = "MOUNT" #@param ["MOUNT", "UNMOUNT"]
#Mount your Gdrive!
from google.colab import drive
drive.mount._DEBUG = False
if MODE == "MOUNT":
  drive.mount('/content/drive', force_remount=True)
elif MODE == "UNMOUNT":
  try:
    drive.flush_and_unmount()
  except ValueError:
    pass
  get_ipython().system_raw("rm -rf /root/.config/Google/DriveFS")

Mounted at /content/drive


In [2]:
#@title 2. Configure Your Video Folder and Chunking Settings
import os
from pathlib import Path

#@markdown ---
#@markdown ### 📁 **Target Folder Path**
#@markdown Enter the path to the folder inside your Google Drive that contains the videos you want to chunk.
DRIVE_FOLDER_PATH = "My Drive/final_review_FMCG" #@param {type:"string"}

#@markdown ---
#@markdown ### ⚙️ **Chunking Rules**
#@markdown Define the rules for splitting your videos.
MAX_DURATION_BEFORE_CHUNKING_SECONDS = 31 #@param {type:"number"}
CHUNK_DURATION_SECONDS = 15 #@param {type:"number"}


#@markdown ---
#@markdown ### 🚀 **Performance Settings**
#@markdown `MAX_WORKERS` controls how many videos are processed at the same time.
MAX_WORKERS = 8 #@param {type:"slider", min:1, max:16, step:1}


# --- Don't edit below this line ---
if "My Drive" in DRIVE_FOLDER_PATH:
    base_gdrive_path = "/content/drive/My Drive/"
    relative_path = DRIVE_FOLDER_PATH.split("My Drive/", 1)[1]
    TARGET_DIR = Path(base_gdrive_path) / relative_path
else:
    TARGET_DIR = Path("/content/drive") / DRIVE_FOLDER_PATH

print(f"✅ Configuration Loaded:")
print(f"   - Target Directory: {TARGET_DIR}")
if not TARGET_DIR.is_dir():
    print(f"   - ❌ ERROR: The specified directory does not exist. Please check the path.")
else:
    print(f"   - ✅ Directory found.")
print(f"   - Splitting videos longer than: {MAX_DURATION_BEFORE_CHUNKING_SECONDS} seconds")
print(f"   - Creating chunks of: {CHUNK_DURATION_SECONDS} seconds each")
print(f"   - Parallel Workers: {MAX_WORKERS}")

✅ Configuration Loaded:
   - Target Directory: /content/drive/My Drive/final_review_FMCG
   - ✅ Directory found.
   - Splitting videos longer than: 31 seconds
   - Creating chunks of: 15 seconds each
   - Parallel Workers: 8


In [3]:
#@title 3. Run the Batch Video Chunker
import subprocess
import json
import math
from pathlib import Path
import os
import concurrent.futures

def get_video_duration(video_path: Path) -> float:
    """
    Gets the duration of a video file in seconds using ffprobe.
    This is faster and more reliable than using OpenCV for just getting metadata.
    """
    command = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',
        '-show_entries', 'format=duration',
        '-of', 'default=noprint_wrappers=1:nokey=1',
        str(video_path)
    ]
    try:
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        return float(result.stdout.strip())
    except (subprocess.CalledProcessError, FileNotFoundError, ValueError) as e:
        print(f"   - [Error] Could not get duration for {video_path.name}: {e}")
        return 0.0

def chunk_and_replace_video(video_path: Path):
    """
    Processes a single long video file: chunks it and then deletes the original.
    """
    thread_name = f"Worker for {video_path.name}"
    print(f"[{thread_name}] Starting job.")

    try:
        duration = get_video_duration(video_path)
        if duration <= MAX_DURATION_BEFORE_CHUNKING_SECONDS:
            print(f"[{thread_name}] Skipping: Duration ({duration:.2f}s) is not over the threshold.")
            return

        print(f"   - [{thread_name}] Found long video (Duration: {duration:.2f}s). Preparing to chunk...")

        num_chunks = math.ceil(duration / CHUNK_DURATION_SECONDS)
        created_chunks = []
        all_chunks_successful = True

        for i in range(num_chunks):
            start_time = i * CHUNK_DURATION_SECONDS
            output_filename = video_path.with_name(f"{video_path.stem}_chunk_{i+1:03d}{video_path.suffix}")

            # Using "-c copy" is crucial for speed. It avoids re-encoding.
            command = [
                'ffmpeg',
                '-y',  # Overwrite output files without asking
                '-i', str(video_path),
                '-ss', str(start_time),
                '-t', str(CHUNK_DURATION_SECONDS),
                '-c', 'copy', # Perform a stream copy (no re-encoding)
                str(output_filename)
            ]

            print(f"     - [{thread_name}] Creating chunk {i+1}/{num_chunks}...")
            result = subprocess.run(command, capture_output=True, text=True, check=False)

            if result.returncode != 0:
                print(f"     - [{thread_name}] ❌ FAIL: FFmpeg failed for chunk {i+1}. Error: {result.stderr}")
                all_chunks_successful = False
                break # Stop processing this video if a chunk fails
            else:
                created_chunks.append(output_filename)

        # After the loop, decide whether to delete the original file
        if all_chunks_successful and created_chunks:
            print(f"   - [{thread_name}] ✅ SUCCESS: All {len(created_chunks)} chunks created. Deleting original file.")
            video_path.unlink() # Delete the original video file
        else:
            print(f"   - [{thread_name}] ❌ CANCELED DELETE: Not all chunks were created successfully. Original file will be kept.")
            # Optional: Clean up any partially created chunks if there was a failure
            for chunk_path in created_chunks:
                if chunk_path.exists():
                    chunk_path.unlink()

    except Exception as e:
        print(f"[{thread_name}] ❌ An unexpected error occurred: {e}")


def run_batch_chunking():
    if not TARGET_DIR.is_dir():
        print(f"ERROR: The target directory '{TARGET_DIR}' was not found. Please check your path in Cell 2.")
        return

    print(f"\n🚀 Starting batch chunking in '{TARGET_DIR}' using {MAX_WORKERS} parallel workers...")
    print("-" * 50)
    video_extensions = ["*.mp4", "*.mov", "*.mkv", "*.avi", "*.webm"]
    videos_to_check = []
    print("Scanning for all video files...")
    for ext in video_extensions:
        for video_path in TARGET_DIR.rglob(ext):
            # Avoid processing files that are already chunks
            if "_chunk_" not in video_path.stem:
                videos_to_check.append(video_path)

    if not videos_to_check:
        print("No video files found to check.")
        return

    print(f"Found {len(videos_to_check)} videos. Checking durations and processing in parallel...")

    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        list(executor.map(chunk_and_replace_video, videos_to_check))

    print("-" * 50)
    print(f"🎉 Batch chunking complete! All jobs have been processed.")


In [4]:
# Run the main function
run_batch_chunking()


🚀 Starting batch chunking in '/content/drive/My Drive/final_review_FMCG' using 8 parallel workers...
--------------------------------------------------
Scanning for all video files...
Found 197 videos. Checking durations and processing in parallel...
[Worker for CpHC7oXWQP4_part_1.mp4] Starting job.
[Worker for CpHC7oXWQP4_part_2.mp4] Starting job.
[Worker for nLyj-Y1sn1s.mp4] Starting job.
[Worker for 3lId7Lc0DU8_part_1.mp4] Starting job.
[Worker for Op6uUdIn9p0.mp4] Starting job.
[Worker for ZtEI4aTNsTQ.mp4] Starting job.
[Worker for CYnPnVX8-ec.mp4] Starting job.
[Worker for u9cbMmtzQgA.mp4] Starting job.
[Worker for u9cbMmtzQgA.mp4] Skipping: Duration (27.57s) is not over the threshold.
[Worker for nG6SaRHnyyg_part_1.mp4] Starting job.
[Worker for ZtEI4aTNsTQ.mp4] Skipping: Duration (11.44s) is not over the threshold.
[Worker for WPUfBaB0OTQ.mp4] Starting job.
[Worker for CYnPnVX8-ec.mp4] Skipping: Duration (15.60s) is not over the threshold.
[Worker for eRcGMl8ePHY.mp4] Starting 

In [12]:
#@title 2. Configure Your Renaming Settings
import os
from pathlib import Path

#@markdown ---
#@markdown ### 📁 **Target Folder Path**
#@markdown Enter the path to the folder inside your Google Drive containing the files to rename.
DRIVE_FOLDER_PATH = "My Drive/final_review_FMCG" #@param {type:"string"}

#@markdown ---
#@markdown ### ⚙️ **Renaming Rules**
#@markdown Configure how the files will be renamed.
STARTING_NUMBER = 1 #@param {type:"number"}
#@markdown Define the sort order to determine the numbering. `Alphabetical` is usually best for consistency.
SORT_ORDER = "Alphabetical (A-Z)" #@param ["Alphabetical (A-Z)", "Modification Date (Newest First)", "Modification Date (Oldest First)"]
#@markdown ---
#@markdown ### 📄 **File Filtering**
#@markdown Enter comma-separated extensions to rename (e.g., `.mp4, .mov, .jpg`). Leave blank to rename ALL files.
FILE_TYPES_TO_RENAME = ".mp4, .mov" #@param {type:"string"}
#@markdown Process files in sub-folders as well?
RENAME_RECURSIVELY = False #@param {type:"boolean"}

# --- Don't edit below this line ---
if "My Drive" in DRIVE_FOLDER_PATH:
    base_gdrive_path = "/content/drive/My Drive/"
    relative_path = DRIVE_FOLDER_PATH.split("My Drive/", 1)[1]
    TARGET_DIR = Path(base_gdrive_path) / relative_path
else:
    TARGET_DIR = Path("/content/drive") / DRIVE_FOLDER_PATH

# Process file types
if FILE_TYPES_TO_RENAME:
    ALLOWED_EXTENSIONS = [ext.strip().lower() for ext in FILE_TYPES_TO_RENAME.split(',')]
else:
    ALLOWED_EXTENSIONS = [] # An empty list means all extensions are allowed

print(f"✅ Configuration Loaded:")
print(f"   - Target Directory: {TARGET_DIR}")
if not TARGET_DIR.is_dir():
    print(f"   - ❌ ERROR: The specified directory does not exist. Please check the path.")
else:
    print(f"   - ✅ Directory found.")
print(f"   - Starting Number: {STARTING_NUMBER}")
print(f"   - Sort Order: {SORT_ORDER}")
print(f"   - Renaming Recursively: {RENAME_RECURSIVELY}")
if ALLOWED_EXTENSIONS:
    print(f"   - File Types to Rename: {ALLOWED_EXTENSIONS}")
else:
    print(f"   - File Types to Rename: ALL FILES")

✅ Configuration Loaded:
   - Target Directory: /content/drive/My Drive/final_review_FMCG
   - ✅ Directory found.
   - Starting Number: 1
   - Sort Order: Alphabetical (A-Z)
   - Renaming Recursively: False
   - File Types to Rename: ['.mp4', '.mov']


In [13]:
#@title 3. Preview and Run the Batch Renamer
import time
from pathlib import Path

def run_batch_rename():
    """
    Scans, sorts, previews, and renames files based on the configuration in Cell 2.
    """
    if not TARGET_DIR.is_dir():
        print(f"❌ ERROR: The target directory '{TARGET_DIR}' was not found. Please re-run Cell 2 with a valid path.")
        return

    print(f"\n🚀 Scanning for files in '{TARGET_DIR}'...")
    print("-" * 50)

    # 1. Gather all files based on configuration
    files_to_rename = []
    glob_pattern = "**/*" if RENAME_RECURSIVELY else "*"
    for path in TARGET_DIR.glob(glob_pattern):
        if path.is_file():
            if not ALLOWED_EXTENSIONS or path.suffix.lower() in ALLOWED_EXTENSIONS:
                files_to_rename.append(path)

    if not files_to_rename:
        print("✅ No files matching the criteria were found. Nothing to do.")
        return

    # 2. Sort the gathered files
    if SORT_ORDER == "Alphabetical (A-Z)":
        files_to_rename.sort(key=lambda p: p.name)
    elif SORT_ORDER == "Modification Date (Newest First)":
        files_to_rename.sort(key=lambda p: p.stat().st_mtime, reverse=True)
    elif SORT_ORDER == "Modification Date (Oldest First)":
        files_to_rename.sort(key=lambda p: p.stat().st_mtime)

    # 3. Generate the renaming plan and show a preview
    print(f"🔍 Found {len(files_to_rename)} files to rename. Please review the plan below:")
    rename_plan = []
    for i, old_path in enumerate(files_to_rename, start=STARTING_NUMBER):
        new_name = f"{i}{old_path.suffix}"
        new_path = old_path.with_name(new_name)
        rename_plan.append({'old': old_path, 'new': new_path})
        print(f"   '{old_path.name}'  ->  '{new_name}'")

    print("-" * 50)
    # 4. Get confirmation from the user
    try:
        confirmation = input("⚠️ Type 'YES' and press Enter to approve this change: ")
    except (KeyboardInterrupt, EOFError):
        print("\n\n❌ Operation cancelled by user.")
        return

    if confirmation.upper() != 'YES':
        print("❌ Confirmation not received. Aborting operation.")
        return

    # 5. Execute the renaming process safely
    print("\n✅ Confirmation received. Starting renaming process...")

    # Phase 1: Rename all files to a temporary, unique name to prevent collisions.
    temp_rename_plan = []
    print("   - Phase 1/2: Renaming to temporary names...")
    for item in rename_plan:
        try:
            temp_name = f"{item['old'].name}.{int(time.time())}.tmp_rename"
            temp_path = item['old'].with_name(temp_name)
            item['old'].rename(temp_path)
            temp_rename_plan.append({'tmp': temp_path, 'new': item['new']})
        except Exception as e:
            print(f"     - ❌ ERROR renaming '{item['old'].name}' to temporary name: {e}")
            # This is a critical error, so we should stop.
            return

    # Phase 2: Rename all temporary files to their final, sequential name.
    print("   - Phase 2/2: Renaming to final sequential names...")
    success_count = 0
    for item in temp_rename_plan:
        try:
            item['tmp'].rename(item['new'])
            print(f"     - Renamed '{item['tmp'].name}' -> '{item['new'].name}'")
            success_count += 1
        except Exception as e:
            print(f"     - ❌ ERROR renaming '{item['tmp'].name}' to '{item['new'].name}': {e}")

    print("-" * 50)
    print(f"🎉 Batch renaming complete! {success_count}/{len(rename_plan)} files were successfully renamed.")



In [14]:
# Run the main function
run_batch_rename()


🚀 Scanning for files in '/content/drive/My Drive/final_review_FMCG'...
--------------------------------------------------
🔍 Found 266 files to rename. Please review the plan below:
   '1.mp4'  ->  '1.mp4'
   '10.mp4'  ->  '2.mp4'
   '100.mp4'  ->  '3.mp4'
   '101.mp4'  ->  '4.mp4'
   '102.mp4'  ->  '5.mp4'
   '103.mp4'  ->  '6.mp4'
   '104.mp4'  ->  '7.mp4'
   '105.mp4'  ->  '8.mp4'
   '106.mp4'  ->  '9.mp4'
   '107.mp4'  ->  '10.mp4'
   '108.mp4'  ->  '11.mp4'
   '109.mp4'  ->  '12.mp4'
   '11.mp4'  ->  '13.mp4'
   '110.mp4'  ->  '14.mp4'
   '111.mp4'  ->  '15.mp4'
   '112.mp4'  ->  '16.mp4'
   '113.mp4'  ->  '17.mp4'
   '114.mp4'  ->  '18.mp4'
   '115.mp4'  ->  '19.mp4'
   '116.mp4'  ->  '20.mp4'
   '117.mp4'  ->  '21.mp4'
   '118.mp4'  ->  '22.mp4'
   '119.mp4'  ->  '23.mp4'
   '12.mp4'  ->  '24.mp4'
   '120.mp4'  ->  '25.mp4'
   '121.mp4'  ->  '26.mp4'
   '122.mp4'  ->  '27.mp4'
   '123.mp4'  ->  '28.mp4'
   '124.mp4'  ->  '29.mp4'
   '125.mp4'  ->  '30.mp4'
   '126.mp4'  ->  '31.

In [5]:
#@title 2. Configure Your Video Folder and Compression Settings
import os
from pathlib import Path

#@markdown ---
#@markdown ### 📁 **Target Folder Path**
#@markdown Enter the path to the folder inside your Google Drive that contains the videos.
DRIVE_FOLDER_PATH = "My Drive/final_review_FMCG" #@param {type:"string"}

#@markdown ---
#@markdown ### ⚙️ **Compression Settings**
#@markdown Set the target file size for your compressed videos.
TARGET_SIZE_MB = 5 #@param {type:"number"}

#@markdown ---
#@markdown ### 🚀 **Performance Settings**
#@markdown `MAX_WORKERS` controls how many videos are processed at the same time. A good starting point is 2 times the number of CPU cores.
MAX_WORKERS = 8 #@param {type:"slider", min:1, max:16, step:1}


# --- Don't edit below this line ---
if "My Drive" in DRIVE_FOLDER_PATH:
    base_gdrive_path = "/content/drive/My Drive/"
    relative_path = DRIVE_FOLDER_PATH.split("My Drive/", 1)[1]
    TARGET_DIR = Path(base_gdrive_path) / relative_path
else:
    TARGET_DIR = Path("/content/drive") / DRIVE_FOLDER_PATH

FILE_SIZE_THRESHOLD = TARGET_SIZE_MB * 1024 * 1024
TARGET_SIZE_BUFFER_MB = TARGET_SIZE_MB * 0.98

print(f"✅ Configuration Loaded:")
print(f"   - Target Directory: {TARGET_DIR}")
if not TARGET_DIR.is_dir():
    print(f"   - ❌ ERROR: The specified directory does not exist. Please check the path.")
else:
    print(f"   - ✅ Directory found.")
print(f"   - Compressing files larger than: {TARGET_SIZE_MB} MB")
print(f"   - Parallel Workers: {MAX_WORKERS}")

✅ Configuration Loaded:
   - Target Directory: /content/drive/My Drive/final_review_FMCG
   - ✅ Directory found.
   - Compressing files larger than: 5 MB
   - Parallel Workers: 8


In [6]:
#@title 3. Run the Batch Compressor (GPU ACCELERATED - MAX PERFORMANCE)
#@title This version offloads encoding to the GPU for a massive speed increase.
#@markdown **IMPORTANT:** Ensure you have enabled the T4 GPU via "Runtime" -> "Change runtime type".
import subprocess
import cv2
import glob
from pathlib import Path
import shutil
import os
import concurrent.futures

def get_video_duration(video_path: Path) -> float:
    # This helper function is unchanged
    try:
        cap = cv2.VideoCapture(str(video_path))
        if not cap.isOpened(): return 0.0
        fps = cap.get(cv2.CAP_PROP_FPS)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        duration = frame_count / fps if fps > 0 else 0
        cap.release()
        return duration
    except Exception:
        return 0.0

def compress_video_gpu(video_path_on_drive: Path):
    """
    This function processes a SINGLE video file using the GPU's hardware encoder.
    """
    thread_name = f"Worker for {video_path_on_drive.name}"
    print(f"[{thread_name}] Starting GPU job.")

    local_video_path = None
    local_output_path = None
    log_file_prefix = None
    try:
        # STEP 1: Copy to local storage (still important for I/O speed)
        local_video_path = Path(f"/content/{video_path_on_drive.name}")
        shutil.copyfile(video_path_on_drive, local_video_path)

        duration = get_video_duration(local_video_path)
        if duration <= 0:
            print(f"[{thread_name}] ⚠️  Skipping: Could not determine duration.")
            return

        target_bits = TARGET_SIZE_BUFFER_MB * 1024 * 1024 * 8
        target_bitrate = int(target_bits / duration)

        local_output_path = Path(f"/content/{local_video_path.stem}_compressed.mp4")
        log_file_prefix = f"/content/{local_video_path.stem}_ffmpeg_log"

        # STEP 2: Process locally using the GPU hardware encoder (h264_nvenc)
        try:
            # === THE KEY CHANGE: USE h264_nvenc and GPU-specific presets ===
            encoder = 'h264_nvenc'
            # NVENC presets are different. 'p1' is fastest, 'p7' is slowest/best quality. 'p2' or 'p3' is great.
            preset = 'p2'

            print(f"   - [{thread_name}] Starting GPU Pass 1 (encoder: {encoder}, preset: {preset})...")
            pass1_command = [
                'ffmpeg', '-y', '-i', str(local_video_path),
                '-c:v', encoder, '-b:v', str(target_bitrate),
                '-pass', '1', '-preset', preset, '-an', '-f', 'mp4',
                '-passlogfile', log_file_prefix,
                os.devnull
            ]
            result1 = subprocess.run(pass1_command, capture_output=True, text=True, check=False)
            if result1.returncode != 0:
                print(f"   - [{thread_name}] ❌ FAIL: FFmpeg GPU Pass 1 failed. Error: {result1.stderr}")
                return

            print(f"   - [{thread_name}] Starting GPU Pass 2...")
            pass2_command = [
                'ffmpeg', '-i', str(local_video_path),
                '-c:v', encoder, '-b:v', str(target_bitrate),
                '-pass', '2', '-preset', preset, '-an',
                '-passlogfile', log_file_prefix,
                str(local_output_path)
            ]
            result2 = subprocess.run(pass2_command, capture_output=True, text=True, check=False)
            if result2.returncode != 0:
                print(f"   - [{thread_name}] ❌ FAIL: FFmpeg GPU Pass 2 failed. Error: {result2.stderr}")
                return
        finally:
            if log_file_prefix:
                for log_file in glob.glob(f"{log_file_prefix}*"):
                    os.remove(log_file)

        # STEP 3: Replace the file on Google Drive
        if local_output_path.exists():
            compressed_size = local_output_path.stat().st_size
            shutil.move(local_output_path, video_path_on_drive)
            print(f"[{thread_name}] ✅ SUCCESS: New size is {compressed_size / 1024**2:.2f} MB. Original replaced.")
        else:
            print(f"[{thread_name}] ❌ FAIL: Compressed file was not created.")

    except Exception as e:
        print(f"[{thread_name}] ❌ An unexpected error occurred: {e}")
    finally:
        # STEP 4: Cleanup local files
        if local_video_path and local_video_path.exists(): local_video_path.unlink()
        if local_output_path and local_output_path.exists(): local_output_path.unlink()


In [7]:
# --- Main execution logic with GPU Check ---
def run_batch_compression():
    # === NEW: CHECK FOR GPU BEFORE STARTING ===
    try:
        result = subprocess.run(['nvidia-smi'], capture_output=True, text=True, check=True)
        print("✅ NVIDIA GPU Detected. Hardware acceleration will be used.")
        print(result.stdout.split('\n')[8]) # Print a line from nvidia-smi to show the GPU details
    except (FileNotFoundError, subprocess.CalledProcessError):
        print("❌ NVIDIA GPU NOT FOUND! ❌")
        print("Please enable a GPU runtime via 'Runtime' -> 'Change runtime type' -> 'T4 GPU' and run this cell again.")
        return

    if not TARGET_DIR.is_dir():
        print(f"ERROR: The target directory '{TARGET_DIR}' was not found. Please check your path in Cell 2.")
        return

    print(f"\n🚀 Starting batch compression in '{TARGET_DIR}' using {MAX_WORKERS} parallel workers...")
    print("-" * 50)
    video_extensions = ["*.mp4", "*.mov", "*.mkv", "*.avi", "*.webm"]
    video_files_to_process = []
    print("Scanning for large files...")
    for ext in video_extensions:
        for video_path in TARGET_DIR.rglob(ext):
            try:
                if video_path.stat().st_size > FILE_SIZE_THRESHOLD:
                    video_files_to_process.append(video_path)
            except Exception as e:
                print(f"Could not stat file {video_path.name}. Error: {e}")
    if not video_files_to_process:
        print("No video files larger than the threshold were found.")
        return
    print(f"Found {len(video_files_to_process)} videos to compress. Starting parallel GPU processing...")

    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # We now map to the new GPU-specific function
        list(executor.map(compress_video_gpu, video_files_to_process))

    print("-" * 50)
    print(f"🎉 Batch compression complete! All GPU jobs have been processed.")

In [8]:
# Run the main function
run_batch_compression()

✅ NVIDIA GPU Detected. Hardware acceleration will be used.
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |

🚀 Starting batch compression in '/content/drive/My Drive/final_review_FMCG' using 8 parallel workers...
--------------------------------------------------
Scanning for large files...
Found 5 videos to compress. Starting parallel GPU processing...
[Worker for how_to_apply_curbicia_purifying_shampoo-mask (720p).mp4] Starting GPU job.
[Worker for Dunking wash microfiber towel in bucket with soapy water. static 4k shot. - Premium Stock Video Footage.mp4] Starting GPU job.
[Worker for Soap Lathering Videos, Download The BEST Free 4k Stock Video Footage & Soap Lathering HD Video Clips_11.mp4] Starting GPU job.
[Worker for Beverage Mixing Videos, Download The BEST Free 4k Stock Video Footage & Beverage Mixing HD Video Clips_4.mp4] Starting GPU job.
[Worker for Blowout Dry Shampoo Foam_chunk_003.mp4] Starting GPU job.
   - [Worker for Soap Lath