In [1]:
import json
import os
import shutil

def organize_data(json_file, videos_folder, output_folder):
    """Organizes video data into training and testing folders based on JSON file."""

    try:
        with open(json_file, 'r') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"Error: JSON file '{json_file}' not found.")
        return
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in '{json_file}'.")
        return

    os.makedirs(os.path.join(output_folder, "training_data"), exist_ok=True)
    os.makedirs(os.path.join(output_folder, "testing_data"), exist_ok=True)

    for entry in data:
        for instance in entry['instances']:
            video_id = instance['video_id']
            video_path = os.path.join(videos_folder, f"{video_id}.mp4")
            split = instance['split']

            if not os.path.exists(video_path):
                print(f"Warning: Video file '{video_path}' not found. Skipping.")
                continue

            if split == "train":
                destination = os.path.join(output_folder, "training_data", f"{video_id}.mp4")
            elif split == "test":
                destination = os.path.join(output_folder, "testing_data", f"{video_id}.mp4")
            else:
                print(f"Warning: Unknown split '{split}' for video '{video_id}'. Skipping.")
                continue

            try:
                shutil.copy2(video_path, destination)  # copy2 preserves metadata
                print(f"Copied '{video_id}.mp4' to '{split}' folder.")
            except shutil.Error as e:
                print(f"Error copying '{video_id}.mp4': {e}")

def main():
    json_file_path = 'WLASL_100.json'  # Path to your JSON file
    videos_folder_path = 'videos'  # Path to your videos folder
    output_directory = 'organized_data'  # Path to the output directory

    organize_data(json_file_path, videos_folder_path, output_directory)

if __name__ == "__main__":
    main()

Copied '69241.mp4' to 'train' folder.
Copied '07069.mp4' to 'train' folder.
Copied '07068.mp4' to 'train' folder.
Copied '07070.mp4' to 'train' folder.
Copied '07074.mp4' to 'train' folder.
Copied '17710.mp4' to 'train' folder.
Copied '65540.mp4' to 'train' folder.
Copied '17711.mp4' to 'train' folder.
Copied '17712.mp4' to 'train' folder.
Copied '17713.mp4' to 'test' folder.
Copied '17709.mp4' to 'train' folder.
Copied '17720.mp4' to 'train' folder.
Copied '17721.mp4' to 'train' folder.
Copied '17722.mp4' to 'train' folder.
Copied '17723.mp4' to 'train' folder.
Copied '12328.mp4' to 'train' folder.
Copied '12311.mp4' to 'train' folder.
Copied '12313.mp4' to 'train' folder.
Copied '12314.mp4' to 'train' folder.
Copied '12316.mp4' to 'train' folder.
Copied '12317.mp4' to 'train' folder.
Copied '12318.mp4' to 'train' folder.
Copied '12319.mp4' to 'train' folder.
Copied '12320.mp4' to 'test' folder.
Copied '12327.mp4' to 'train' folder.
Copied '05728.mp4' to 'train' folder.
Copied '05729.