In [2]:
import os
import zipfile
import shutil
import json
import time
import gc
from moviepy.editor import VideoFileClip
from tqdm import tqdm
import platform 

In [6]:
def process_videos_and_labels_in_zip(zip_file_path, output_folder, target_duration_minutes=10, labels_filename="labels.json"):
    """
    Unpacks videos and a JSON labels file from a ZIP, trims the videos,
    and saves them to a new folder. Handles videos in nested folders.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print(f"Created output folder: {output_folder}")

    temp_extract_folder = os.path.join(os.path.dirname(output_folder), "temp_extracted_videos")
    
    if os.path.exists(temp_extract_folder):
        print(f"Temporary folder exists: {temp_extract_folder}. Cleaning up...")
        try:
            shutil.rmtree(temp_extract_folder, ignore_errors=False, onerror=handle_remove_readonly)
        except Exception as e:
            print(f"Error cleaning up old temp folder: {e}. Please close any programs that might be using it or restart the script.")
            return
    os.makedirs(temp_extract_folder)
    print(f"Created temporary extraction folder: {temp_extract_folder}")

    print(f"Unpacking {zip_file_path}...")
    try:
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall(temp_extract_folder)
        print("Unpacking complete.")
    except Exception as e:
        print(f"Critical error during ZIP extraction: {e}")
        return

    print("Starting video trimming and label processing...")

    processed_video_count = 0
    skipped_video_count = 0
    error_video_count = 0
    
    video_extensions = ('.mp4', '.avi', '.mkv', '.mov', '.webm', '.flv')
    files_to_process = []
    labels_file_path_in_temp = None

    for root, _, files in os.walk(temp_extract_folder):
        for filename in files:
            if filename.lower().endswith(video_extensions):
                files_to_process.append(os.path.join(root, filename))
            elif filename == labels_filename:
                labels_file_path_in_temp = os.path.join(root, filename)

    if labels_file_path_in_temp:
        try:
            shutil.copy2(labels_file_path_in_temp, os.path.join(output_folder, labels_filename))
            print(f"Labels file '{labels_filename}' copied to {output_folder}")
        except Exception as e:
            print(f"Error copying labels file '{labels_filename}': {e}")
    else:
        print(f"Warning: Labels file '{labels_filename}' not found in the ZIP archive.")

    for original_filepath in tqdm(files_to_process[5:], desc="Processing videos"):
        filename = os.path.basename(original_filepath)
        
        if filename.startswith("-"):
            safe_filename = "processed_" + filename
        else:
            safe_filename = filename
        
        output_filepath = os.path.join(output_folder, safe_filename)
        
        clip = None
        
        try:
            clip = VideoFileClip(original_filepath)
            target_duration_seconds = target_duration_minutes * 60

            write_params = {
                'codec': 'libx264',
                'audio_codec': 'aac',
                'threads': 4, 
                'preset': 'ultrafast',
                'logger': None
            }

            if clip.duration > target_duration_seconds:
                with clip.subclip(0, target_duration_seconds) as cut_clip:
                    cut_clip.write_videofile(output_filepath, **write_params)
                print(f"  Обрезано и сохранено: {safe_filename}")
                processed_video_count += 1
            else:
                clip.write_videofile(output_filepath, **write_params)
                print(f"  Видео {safe_filename} короче цели, скопировано.")
                skipped_video_count += 1
            
        except Exception as e:
            print(f"\n!!! Ошибка при обработке видео {filename}: {e}\n")
            error_video_count += 1
            if os.path.exists(output_filepath):
                try:
                    os.remove(output_filepath)
                    print(f"  Удален неполный выходной файл: {output_filepath}")
                except Exception as remove_e:
                    print(f"  Ошибка при удалении неполного файла {output_filepath}: {remove_e}")
        finally:
            if clip:
                try:
                    clip.close() 
                    if hasattr(clip, 'tempfiles'):
                        for temp_file in clip.tempfiles:
                            if os.path.exists(temp_file):
                                try:
                                    os.remove(temp_file)
                                except Exception:
                                    pass
                except Exception as close_e:
                    print(f"  Ошибка при закрытии clip ресурсов для {filename}: {close_e}")
            gc.collect()

    print("Attempting to remove temporary directory...")
    time.sleep(2)  
    try:
        shutil.rmtree(temp_extract_folder, ignore_errors=False, onerror=handle_remove_readonly)
        print(f"Temporary folder {temp_extract_folder} and its contents have been removed.")
    except Exception as e:
        print(f"ERROR: Failed to remove temporary folder {temp_extract_folder}: {e}")
        print("Please consider deleting it manually.")

    print("\n--- Processing Report ---")
    print(f"Total videos trimmed: {processed_video_count}")
    print(f"Total videos copied (shorter than target): {skipped_video_count}")
    print(f"Total errors during video processing: {error_video_count}")

def handle_remove_readonly(func, path, exc_info):
    
    if func == os.remove and platform.system() == "Windows":
        import stat
        os.chmod(path, stat.S_IWRITE)
        func(path)
    else:
        raise 

if __name__ == "__main__":
    zip_file = r"C:\Users\Admin\Downloads\data_train_short.zip"
    output_directory = r"D:\trimmed_videos_with_labels"
    labels_file_name_global = "labels.json"

    process_videos_and_labels_in_zip(
        zip_file, 
        output_directory, 
        target_duration_minutes=10, 
        labels_filename=labels_file_name_global
    )
    
    print(f"\nAll processed videos and the '{labels_file_name_global}' file are saved in: {output_directory}")


Temporary folder exists: D:\temp_extracted_videos. Cleaning up...
Created temporary extraction folder: D:\temp_extracted_videos
Unpacking C:\Users\Admin\Downloads\data_train_short.zip...
Unpacking complete.
Starting video trimming and label processing...
Labels file 'labels.json' copied to D:\trimmed_videos_with_labels


Processing videos:   0%|                                                                        | 0/75 [00:00<?, ?it/s]


!!! Ошибка при обработке видео -220020068_456241755.mp4: [Errno 32] Broken pipe

MoviePy error: FFMPEG encountered the following error while writing file D:\trimmed_videos_with_labels\processed_-220020068_456241755.mp4:

 



Processing videos:   1%|▊                                                          | 1/75 [34:37<42:41:40, 2077.04s/it]

  Удален неполный выходной файл: D:\trimmed_videos_with_labels\processed_-220020068_456241755.mp4


Processing videos:   3%|█▌                                                          | 2/75 [36:58<19:01:29, 938.22s/it]

  Обрезано и сохранено: processed_-220020068_456241756.mp4

!!! Ошибка при обработке видео -220020068_456241758.mp4: [Errno 32] Broken pipe

MoviePy error: FFMPEG encountered the following error while writing file D:\trimmed_videos_with_labels\processed_-220020068_456241758.mp4:

 

  Удален неполный выходной файл: D:\trimmed_videos_with_labels\processed_-220020068_456241758.mp4


Processing videos:   5%|███▏                                                        | 4/75 [52:12<11:43:02, 594.12s/it]


!!! Ошибка при обработке видео -220020068_456241844.mp4: [Errno 28] No space left on device

MoviePy error: FFMPEG encountered the following error while writing file D:\trimmed_videos_with_labels\processed_-220020068_456241844.mp4:

 

  Удален неполный выходной файл: D:\trimmed_videos_with_labels\processed_-220020068_456241844.mp4


Processing videos:   7%|████                                                         | 5/75 [56:16<9:06:04, 468.06s/it]


!!! Ошибка при обработке видео -220020068_456241845.mp4: [Errno 32] Broken pipe

MoviePy error: FFMPEG encountered the following error while writing file D:\trimmed_videos_with_labels\processed_-220020068_456241845.mp4:

 

  Удален неполный выходной файл: D:\trimmed_videos_with_labels\processed_-220020068_456241845.mp4


Processing videos:   8%|████▋                                                      | 6/75 [1:03:06<8:35:24, 448.18s/it]


!!! Ошибка при обработке видео -220020068_456241846.mp4: [Errno 32] Broken pipe

MoviePy error: FFMPEG encountered the following error while writing file D:\trimmed_videos_with_labels\processed_-220020068_456241846.mp4:

 

  Удален неполный выходной файл: D:\trimmed_videos_with_labels\processed_-220020068_456241846.mp4


Processing videos:   9%|█████▌                                                     | 7/75 [1:09:02<7:53:39, 417.93s/it]


!!! Ошибка при обработке видео -220020068_456241847.mp4: [Errno 32] Broken pipe

MoviePy error: FFMPEG encountered the following error while writing file D:\trimmed_videos_with_labels\processed_-220020068_456241847.mp4:

 

  Удален неполный выходной файл: D:\trimmed_videos_with_labels\processed_-220020068_456241847.mp4


Processing videos:  11%|██████▎                                                    | 8/75 [1:11:19<6:07:02, 328.70s/it]

  Обрезано и сохранено: processed_-220020068_456241849.mp4


Processing videos:  12%|███████                                                    | 9/75 [1:15:01<5:24:45, 295.24s/it]


!!! Ошибка при обработке видео -220020068_456241850.mp4: [Errno 32] Broken pipe

MoviePy error: FFMPEG encountered the following error while writing file D:\trimmed_videos_with_labels\processed_-220020068_456241850.mp4:

 

  Удален неполный выходной файл: D:\trimmed_videos_with_labels\processed_-220020068_456241850.mp4


Processing videos:  13%|███████▋                                                  | 10/75 [1:17:18<4:26:59, 246.45s/it]

  Обрезано и сохранено: processed_-220020068_456241851.mp4


Processing videos:  15%|████████▌                                                 | 11/75 [1:17:54<3:14:08, 182.00s/it]

  Обрезано и сохранено: processed_-220020068_456248657.mp4


Processing videos:  16%|█████████▎                                                | 12/75 [1:19:15<2:38:50, 151.28s/it]

  Обрезано и сохранено: processed_-220020068_456249667.mp4


Processing videos:  17%|██████████                                                | 13/75 [1:20:40<2:15:40, 131.30s/it]

  Обрезано и сохранено: processed_-220020068_456249692.mp4


Processing videos:  19%|██████████▊                                               | 14/75 [1:22:04<1:58:57, 117.01s/it]

  Обрезано и сохранено: processed_-220020068_456249693.mp4


Processing videos:  20%|███████████▌                                              | 15/75 [1:23:32<1:48:09, 108.16s/it]

  Обрезано и сохранено: processed_-220020068_456249716.mp4


Processing videos:  21%|████████████▎                                             | 16/75 [1:25:00<1:40:33, 102.27s/it]

  Обрезано и сохранено: processed_-220020068_456249719.mp4


Processing videos:  23%|█████████████▎                                             | 17/75 [1:26:29<1:34:53, 98.17s/it]

  Обрезано и сохранено: processed_-220020068_456249720.mp4


Processing videos:  24%|██████████████▏                                            | 18/75 [1:27:54<1:29:29, 94.20s/it]

  Обрезано и сохранено: processed_-220020068_456249732.mp4


Processing videos:  25%|██████████████▉                                            | 19/75 [1:29:23<1:26:18, 92.47s/it]

  Обрезано и сохранено: processed_-220020068_456249733.mp4


Processing videos:  27%|███████████████▍                                          | 20/75 [1:31:54<1:41:03, 110.24s/it]

  Обрезано и сохранено: processed_-220020068_456249739.mp4


Processing videos:  28%|████████████████▌                                          | 21/75 [1:32:36<1:20:51, 89.84s/it]

  Обрезано и сохранено: processed_-220020068_456252055.mp4


Processing videos:  29%|█████████████████▎                                         | 22/75 [1:34:02<1:18:19, 88.66s/it]

  Обрезано и сохранено: processed_-220020068_456253855.mp4


Processing videos:  31%|██████████████████                                         | 23/75 [1:35:31<1:16:45, 88.57s/it]

  Обрезано и сохранено: processed_-220020068_456253876.mp4


Processing videos:  32%|██████████████████▉                                        | 24/75 [1:37:00<1:15:27, 88.77s/it]

  Обрезано и сохранено: processed_-220020068_456254282.mp4


Processing videos:  33%|███████████████████▋                                       | 25/75 [1:38:27<1:13:31, 88.23s/it]

  Обрезано и сохранено: processed_-220020068_456254537.mp4


Processing videos:  35%|████████████████████                                      | 26/75 [1:40:52<1:26:02, 105.35s/it]

  Обрезано и сохранено: processed_-220020068_456254614.mp4


Processing videos:  36%|████████████████████▉                                     | 27/75 [1:43:16<1:33:29, 116.86s/it]

  Обрезано и сохранено: processed_-220020068_456254621.mp4


Processing videos:  37%|█████████████████████▋                                    | 28/75 [1:44:39<1:23:37, 106.75s/it]

  Обрезано и сохранено: processed_-220020068_456255332.mp4


Processing videos:  39%|██████████████████████▍                                   | 29/75 [1:46:04<1:16:44, 100.10s/it]

  Обрезано и сохранено: processed_-220020068_456255338.mp4


Processing videos:  40%|███████████████████████▌                                   | 30/75 [1:47:28<1:11:26, 95.26s/it]

  Обрезано и сохранено: processed_-220020068_456255339.mp4


Processing videos:  41%|████████████████████████▍                                  | 31/75 [1:48:51<1:07:13, 91.67s/it]

  Обрезано и сохранено: processed_-220020068_456255340.mp4


Processing videos:  43%|█████████████████████████▏                                 | 32/75 [1:50:16<1:04:22, 89.83s/it]

  Обрезано и сохранено: processed_-220020068_456255341.mp4


Processing videos:  44%|█████████████████████████▉                                 | 33/75 [1:51:38<1:01:14, 87.49s/it]

  Обрезано и сохранено: processed_-220020068_456255344.mp4


Processing videos:  45%|███████████████████████████▋                                 | 34/75 [1:53:02<58:59, 86.32s/it]

  Обрезано и сохранено: processed_-220020068_456255346.mp4


Processing videos:  47%|████████████████████████████▍                                | 35/75 [1:54:23<56:24, 84.61s/it]

  Обрезано и сохранено: processed_-220020068_456255349.mp4


Processing videos:  48%|█████████████████████████████▎                               | 36/75 [1:55:46<54:39, 84.10s/it]

  Обрезано и сохранено: processed_-220020068_456255389.mp4


Processing videos:  49%|██████████████████████████████                               | 37/75 [1:57:11<53:29, 84.45s/it]

  Обрезано и сохранено: processed_-220020068_456255391.mp4


Processing videos:  51%|██████████████████████████████▉                              | 38/75 [1:58:35<52:01, 84.38s/it]

  Обрезано и сохранено: processed_-220020068_456255392.mp4


Processing videos:  52%|███████████████████████████████▋                             | 39/75 [2:00:02<51:00, 85.03s/it]

  Обрезано и сохранено: processed_-220020068_456255393.mp4


Processing videos:  53%|████████████████████████████████▌                            | 40/75 [2:01:28<49:50, 85.45s/it]

  Обрезано и сохранено: processed_-220020068_456255394.mp4


Processing videos:  55%|█████████████████████████████████▎                           | 41/75 [2:02:55<48:35, 85.74s/it]

  Обрезано и сохранено: processed_-220020068_456255395.mp4


Processing videos:  56%|██████████████████████████████████▏                          | 42/75 [2:04:22<47:22, 86.12s/it]

  Обрезано и сохранено: processed_-220020068_456255396.mp4


Processing videos:  57%|██████████████████████████████████▉                          | 43/75 [2:05:49<46:07, 86.48s/it]

  Обрезано и сохранено: processed_-220020068_456255399.mp4


Processing videos:  59%|███████████████████████████████████▊                         | 44/75 [2:07:15<44:39, 86.44s/it]

  Обрезано и сохранено: processed_-220020068_456255400.mp4


Processing videos:  60%|████████████████████████████████████▌                        | 45/75 [2:08:45<43:43, 87.45s/it]

  Обрезано и сохранено: processed_-220020068_456255401.mp4


Processing videos:  61%|█████████████████████████████████████▍                       | 46/75 [2:10:15<42:37, 88.19s/it]

  Обрезано и сохранено: processed_-220020068_456255402.mp4


Processing videos:  63%|██████████████████████████████████████▏                      | 47/75 [2:11:43<41:12, 88.30s/it]

  Обрезано и сохранено: processed_-220020068_456255403.mp4


Processing videos:  64%|███████████████████████████████████████                      | 48/75 [2:13:11<39:36, 88.00s/it]

  Обрезано и сохранено: processed_-220020068_456255405.mp4


Processing videos:  65%|███████████████████████████████████████▊                     | 49/75 [2:14:39<38:11, 88.12s/it]

  Обрезано и сохранено: processed_-220020068_456255407.mp4


Processing videos:  67%|████████████████████████████████████████▋                    | 50/75 [2:16:04<36:16, 87.08s/it]

  Обрезано и сохранено: processed_-220020068_456255409.mp4


Processing videos:  68%|█████████████████████████████████████████▍                   | 51/75 [2:17:27<34:24, 86.03s/it]

  Обрезано и сохранено: processed_-220020068_456255410.mp4


Processing videos:  69%|██████████████████████████████████████████▎                  | 52/75 [2:18:52<32:47, 85.56s/it]

  Обрезано и сохранено: processed_-220020068_456255411.mp4


Processing videos:  71%|███████████████████████████████████████████                  | 53/75 [2:20:18<31:26, 85.73s/it]

  Обрезано и сохранено: processed_-220020068_456255412.mp4


Processing videos:  72%|███████████████████████████████████████████▉                 | 54/75 [2:21:42<29:47, 85.10s/it]

  Обрезано и сохранено: processed_-220020068_456255414.mp4


Processing videos:  73%|████████████████████████████████████████████▋                | 55/75 [2:23:07<28:22, 85.12s/it]

  Обрезано и сохранено: processed_-220020068_456255766.mp4


Processing videos:  75%|█████████████████████████████████████████████▌               | 56/75 [2:24:32<26:58, 85.21s/it]

  Обрезано и сохранено: processed_-220020068_456255767.mp4


Processing videos:  76%|██████████████████████████████████████████████▎              | 57/75 [2:25:54<25:16, 84.27s/it]

  Обрезано и сохранено: processed_-220020068_456255773.mp4


Processing videos:  77%|███████████████████████████████████████████████▏             | 58/75 [2:27:19<23:54, 84.41s/it]

  Обрезано и сохранено: processed_-220020068_456255779.mp4


Processing videos:  79%|███████████████████████████████████████████████▉             | 59/75 [2:28:42<22:22, 83.88s/it]

  Обрезано и сохранено: processed_-220020068_456255780.mp4


Processing videos:  80%|████████████████████████████████████████████████▊            | 60/75 [2:30:09<21:12, 84.85s/it]

  Обрезано и сохранено: processed_-220020068_456256003.mp4


Processing videos:  81%|█████████████████████████████████████████████████▌           | 61/75 [2:31:33<19:44, 84.61s/it]

  Обрезано и сохранено: processed_-220020068_456256005.mp4


Processing videos:  83%|██████████████████████████████████████████████████▍          | 62/75 [2:33:00<18:30, 85.44s/it]

  Обрезано и сохранено: processed_-220020068_456256012.mp4


Processing videos:  84%|███████████████████████████████████████████████████▏         | 63/75 [2:34:28<17:15, 86.28s/it]

  Обрезано и сохранено: processed_-220020068_456256013.mp4


Processing videos:  85%|████████████████████████████████████████████████████         | 64/75 [2:35:54<15:48, 86.21s/it]

  Обрезано и сохранено: processed_-220020068_456256016.mp4


Processing videos:  87%|████████████████████████████████████████████████████▊        | 65/75 [2:37:20<14:20, 86.01s/it]

  Обрезано и сохранено: processed_-220020068_456256019.mp4


Processing videos:  88%|█████████████████████████████████████████████████████▋       | 66/75 [2:38:48<13:00, 86.68s/it]

  Обрезано и сохранено: processed_-220020068_456256430.mp4


Processing videos:  89%|██████████████████████████████████████████████████████▍      | 67/75 [2:40:18<11:40, 87.54s/it]

  Обрезано и сохранено: processed_-220020068_456256446.mp4


Processing videos:  91%|███████████████████████████████████████████████████████▎     | 68/75 [2:41:45<10:12, 87.56s/it]

  Обрезано и сохранено: processed_-220020068_456256475.mp4


Processing videos:  92%|████████████████████████████████████████████████████████     | 69/75 [2:43:16<08:50, 88.41s/it]

  Обрезано и сохранено: processed_-220020068_456256571.mp4


Processing videos:  93%|████████████████████████████████████████████████████████▉    | 70/75 [2:44:42<07:18, 87.71s/it]

  Обрезано и сохранено: processed_-220020068_456256868.mp4


Processing videos:  95%|█████████████████████████████████████████████████████████▋   | 71/75 [2:46:16<05:58, 89.50s/it]

  Обрезано и сохранено: processed_-220020068_456256893.mp4


Processing videos:  96%|██████████████████████████████████████████████████████████▌  | 72/75 [2:47:37<04:21, 87.17s/it]

  Обрезано и сохранено: processed_-220020068_456257136.mp4


Processing videos:  97%|███████████████████████████████████████████████████████████▎ | 73/75 [2:48:56<02:49, 84.73s/it]

  Обрезано и сохранено: processed_-220020068_456257137.mp4


Processing videos:  99%|████████████████████████████████████████████████████████████▏| 74/75 [2:50:19<01:23, 83.96s/it]

  Обрезано и сохранено: processed_-220020068_456257139.mp4


Processing videos: 100%|████████████████████████████████████████████████████████████| 75/75 [2:51:43<00:00, 137.38s/it]

  Обрезано и сохранено: processed_-220020068_456257141.mp4
Attempting to remove temporary directory...





Temporary folder D:\temp_extracted_videos and its contents have been removed.

--- Processing Report ---
Total videos trimmed: 68
Total videos copied (shorter than target): 0
Total errors during video processing: 7

All processed videos and the 'labels.json' file are saved in: D:\trimmed_videos_with_labels
