In [None]:
!nvidia-smi

In [None]:
!ffmpeg -version

In [None]:
import os
import glob
import time
import subprocess
import shutil  # <-- IMPORT ADDED HERE

# ==============================================================================
# Cell 1: Setup Paths and Configuration
# IMPORTANT: Update these paths to match your Google Drive structure
# ==============================================================================
INPUT_FOLDER = "/content/drive/MyDrive/Dataset1_Raw_Videos"  # ⚠️ UPDATE THIS
OUTPUT_FOLDER = "/content/drive/MyDrive/New_Test/output"  # ⚠️ UPDATE THIS
TEMP_DIR = "/content/temp_videos"  # Local Colab storage

# Create directories if they don't exist
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
os.makedirs(TEMP_DIR, exist_ok=True)


# ==============================================================================
# Cell 2: Define compression function with GPU acceleration
# ==============================================================================
def compress_video_gpu(input_path, output_path, crf=23):
    """
    Compress video using GPU acceleration (NVENC H.265).
    Falls back to CPU if GPU encoding fails.
    """
    # First try GPU encoding with NVENC
    gpu_command = [
        "ffmpeg",
        "-y",
        "-i",
        input_path,
        "-c:v",
        "hevc_nvenc",
        "-preset",
        "medium",
        "-crf",
        str(crf),
        "-c:a",
        "copy",
        "-movflags",
        "+faststart",
        output_path,
    ]

    try:
        print(f"Trying GPU encoding for: {os.path.basename(input_path)}")
        result = subprocess.run(
            gpu_command, capture_output=True, text=True, timeout=3600
        )
        if result.returncode == 0:
            return True, "GPU encoding successful"
    except Exception as e:
        print(f"GPU encoding failed: {e}")

    # Fallback to CPU encoding
    cpu_command = [
        "ffmpeg",
        "-y",
        "-i",
        input_path,
        "-c:v",
        "libx265",
        "-crf",
        str(crf),
        "-preset",
        "medium",
        "-c:a",
        "copy",
        "-movflags",
        "+faststart",
        output_path,
    ]

    try:
        print(f"Using CPU encoding for: {os.path.basename(input_path)}")
        result = subprocess.run(
            cpu_command, capture_output=True, text=True, timeout=3600
        )
        if result.returncode == 0:
            return True, "CPU encoding successful"
        else:
            return False, f"Encoding failed: {result.stderr}"
    except Exception as e:
        return False, f"Error: {e}"


# ==============================================================================
# Cell 3: Smart batch processing for large videos
# ==============================================================================
def get_video_info(video_path):
    """Get video file size."""
    try:
        size_mb = os.path.getsize(video_path) / (1024 * 1024)
        return size_mb
    except:
        return 0


def process_videos_smart_batches(
    input_folder, output_folder, temp_dir, max_videos_per_session=10
):
    """
    Process videos in smart batches, optimized for large files and Colab storage.
    """
    video_files = glob.glob(os.path.join(input_folder, "*.mp4"))

    if not video_files:
        print("No MP4 files found in the input folder!")
        return

    video_info = [(f, get_video_info(f)) for f in video_files]
    video_info.sort(key=lambda x: x[1])

    print(f"Found {len(video_files)} MP4 files to process")

    # Check for already processed files
    already_processed = [
        os.path.basename(f)
        for f in glob.glob(os.path.join(output_folder, "compressed_*.mp4"))
    ]
    if already_processed:
        print(f"Already processed: {len(already_processed)} files")

    files_to_process = [
        v
        for v in video_info
        if f"compressed_{os.path.basename(v[0])}" not in already_processed
    ]

    if not files_to_process:
        print("All videos have already been processed!")
        return

    print(
        f"Processing {min(max_videos_per_session, len(files_to_process))} videos in this session"
    )

    successful_count = 0
    failed_files = []
    processed_this_session = 0

    for video_file, file_size_mb in files_to_process:
        if processed_this_session >= max_videos_per_session:
            print(f"\nReached maximum videos per session ({max_videos_per_session}).")
            print("Restart the session to continue with remaining videos.")
            break

        filename = os.path.basename(video_file)
        final_output = os.path.join(output_folder, f"compressed_{filename}")
        temp_output = os.path.join(temp_dir, f"compressed_{filename}")

        print(
            f"\n--- Processing video {processed_this_session + 1}/{max_videos_per_session} ---"
        )
        print(f"File: {filename} ({file_size_mb:.1f} MB)")

        disk_usage = os.statvfs("/content")
        free_space_gb = (disk_usage.f_bavail * disk_usage.f_frsize) / (
            1024 * 1024 * 1024
        )
        print(f"Available space: {free_space_gb:.1f} GB")

        start_time = time.time()
        success, message = compress_video_gpu(video_file, temp_output)

        if success:
            try:
                # =================================================================
                # THE FIX IS HERE: Using shutil.move instead of os.rename
                # =================================================================
                shutil.move(temp_output, final_output)

                new_size = os.path.getsize(final_output) / (1024 * 1024)
                compression_ratio = (
                    (file_size_mb - new_size) / file_size_mb * 100
                    if file_size_mb > 0
                    else 0
                )
                processing_time = time.time() - start_time

                print(f"✓ Success: {filename}")
                print(
                    f"  Original: {file_size_mb:.1f}MB → Compressed: {new_size:.1f}MB"
                )
                print(
                    f"  Compression: {compression_ratio:.1f}% | Time: {processing_time/60:.1f} min"
                )
                successful_count += 1
            except Exception as e:
                print(f"✗ Failed to move file: {filename} - {e}")
                failed_files.append(filename)
                if os.path.exists(temp_output):
                    os.remove(temp_output)
        else:
            print(f"✗ Failed to encode: {filename} - {message}")
            failed_files.append(filename)
            if os.path.exists(temp_output):
                os.remove(temp_output)

        processed_this_session += 1

    print(f"\n=== Session Complete ===")
    print(f"Processed this session: {successful_count}/{processed_this_session}")
    if failed_files:
        print(f"Failed files: {failed_files}")


# ==============================================================================
# Cell 4: Utility functions for session management
# ==============================================================================
def show_progress_summary():
    """Show overall progress across all sessions."""
    input_files = glob.glob(os.path.join(INPUT_FOLDER, "*.mp4"))
    output_files = glob.glob(os.path.join(OUTPUT_FOLDER, "compressed_*.mp4"))

    if not input_files:
        print("No input files found to generate a summary.")
        return

    print(f"\n=== OVERALL PROGRESS ===")
    print(f"Total input files: {len(input_files)}")
    print(f"Completed files: {len(output_files)}")
    progress = len(output_files) / len(input_files) * 100 if input_files else 0
    print(f"Progress: {progress:.1f}%")


def check_storage_space():
    """Check available storage space."""
    disk_usage = os.statvfs("/content")
    free_space_gb = (disk_usage.f_bavail * disk_usage.f_frsize) / (1024 * 1024 * 1024)
    print(f"Available local space: {free_space_gb:.1f} GB")
    return free_space_gb


def cleanup_temp_storage():
    """Clean up temporary storage."""
    temp_files = glob.glob(os.path.join(TEMP_DIR, "*"))
    if not temp_files:
        print("Temporary storage is already clean.")
        return
    for temp_file in temp_files:
        try:
            os.remove(temp_file)
            print(f"Removed: {os.path.basename(temp_file)}")
        except Exception as e:
            print(f"Could not remove {os.path.basename(temp_file)}: {e}")
    print(f"Cleaned temporary storage: {len(temp_files)} files removed.")


# ==============================================================================
# Cell 5: Execute the processing and show summary
# ==============================================================================

print("Starting video processing session...")
check_storage_space()

# Process 10 videos per session (adjust if needed)
process_videos_smart_batches(
    INPUT_FOLDER, OUTPUT_FOLDER, TEMP_DIR, max_videos_per_session=30
)

# Show progress summary after the session
show_progress_summary()

print("\n" + "=" * 50)
print("📋 QUICK COMMANDS (run in a new cell):")
print("• To process the next batch, just re-run this entire cell.")
print("• `cleanup_temp_storage()` - Clean temporary files.")
print("• `check_storage_space()` - Check available local space.")
print("• `show_progress_summary()` - Show overall progress.")
print("=" * 50)

In [None]:
show_progress_summary()

In [None]:
import os
import glob
import time
import subprocess
import shutil
import json

INPUT_FOLDER = "/content/drive/MyDrive/Dataset1_Raw_Videos"
OUTPUT_FOLDER = "/content/drive/MyDrive/New_Test/output"
TEMP_DIR = "/content/temp_videos"
LOG_FILE = "/content/drive/MyDrive/New_Test/session_summary.json"


# ==============================================================================
# Cell 2: Function to scan folders and create a JSON summary
# ==============================================================================
def create_progress_summary_json(input_folder, output_folder, log_file):
    """
    Scans input and output folders, then generates a JSON file summarizing
    the processing status of all videos without performing any compression.
    """
    print("Scanning folders to generate summary...")

    # Find all source videos
    input_files = glob.glob(os.path.join(input_folder, "*.mp4"))
    if not input_files:
        print("No MP4 files found in the input folder.")
        return

    # Find all already compressed videos and get their original names
    output_files = glob.glob(os.path.join(output_folder, "compressed_*.mp4"))

    # Create a set of original filenames that have been processed for quick lookups
    # e.g., "compressed_video1.mp4" becomes "video1.mp4"
    processed_originals = {
        os.path.basename(f).replace("compressed_", "", 1) for f in output_files
    }

    completed_videos = []
    unprocessed_videos = []

    # Sort through input files and categorize them
    for video_path in input_files:
        filename = os.path.basename(video_path)
        if filename in processed_originals:
            completed_videos.append(filename)
        else:
            unprocessed_videos.append(filename)

    # Prepare the data for the JSON file
    summary_data = {
        "summary_timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
        "total_videos": len(input_files),
        "completed_count": len(completed_videos),
        "unprocessed_count": len(unprocessed_videos),
        "completed_videos": sorted(completed_videos),
        "unprocessed_videos": sorted(unprocessed_videos),
    }

    # Write the data to the JSON file
    try:
        with open(log_file, "w") as f:
            json.dump(summary_data, f, indent=4)
        print(f"\n✅ Success! Summary file created at: {log_file}")
        print(
            f"📊 Status: {len(completed_videos)} completed, {len(unprocessed_videos)} pending."
        )
    except Exception as e:
        print(f"\n❌ Error: Could not write summary file. {e}")

In [None]:
create_progress_summary_json(INPUT_FOLDER, OUTPUT_FOLDER, LOG_FILE)