In [1]:
from google.colab import drive
# Step 1: Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Step 2: Define the base dataset path in Google Drive
data_path = "/content/drive/MyDrive/TharuWorks/negative"  # Base path to the dataset

## Split the Dataset

In [3]:
from sklearn.model_selection import train_test_split
import shutil
import glob
import os

# Step 3: List all video files across all subfolders of the dataset
# This will include files from `carcrash'`, `Crash dataset russian`, `Dachcam_dataset` directories.
video_files = glob.glob(os.path.join(data_path, '**', '*.mp4'), recursive=True)

# Step 4: Split the dataset into train, validation, and test sets
train_files, temp_files = train_test_split(video_files, test_size=0.3, random_state=42)
val_files, test_files = train_test_split(temp_files, test_size=0.5, random_state=42)

# Step 5: Create directories in Google Drive for train, validation, and test sets
train_dir = os.path.join(data_path, 'train')
val_dir = os.path.join(data_path, 'val')
test_dir = os.path.join(data_path, 'test')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Step 6: Copy the files to the respective Google Drive folders if they do not already exist

def copy_files_if_not_exists(file_list, destination_dir):
    for file in file_list:
        # Define the destination path for each file
        destination = os.path.join(destination_dir, os.path.basename(file))
        # Copy the file only if it does not already exist
        if not os.path.exists(destination):
            shutil.copy(file, destination)

# Train files
copy_files_if_not_exists(train_files, train_dir)

# Validation files
copy_files_if_not_exists(val_files, val_dir)

# Test files
copy_files_if_not_exists(test_files, test_dir)

print("Files have been successfully split and saved in Google Drive.")

Files have been successfully split and saved in Google Drive.


## Frame Extraction

In [4]:
import cv2
import os
import math
import json

def ensure_dir(directory):
    """Ensure the output directory exists."""
    if not os.path.exists(directory):
        os.makedirs(directory)

def extract_frames(video_path, output_folder, interval_seconds=3):
    """
    Extract frames from a video based on the 3-second rule and log metadata.

    Parameters:
    - video_path: Path to the input video file.
    - output_folder: Path to save extracted frames.
    - interval_seconds: Time interval between frames in seconds (default = 3 seconds).
    """
    # Open the video file
    capture = cv2.VideoCapture(video_path)
    if not capture.isOpened():
        print(f"Error opening video file: {video_path}")
        return

    # Get video properties
    fps = int(capture.get(cv2.CAP_PROP_FPS))  # Frames per second
    total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))  # Total number of frames
    duration = total_frames / fps  # Calculate video duration in seconds
    frame_interval = math.ceil(fps * interval_seconds)  # Frames to skip for 3-second rule

    print(f"Processing video: {video_path}")
    print(f"FPS: {fps}, Frame Interval: {frame_interval} frames, Duration: {duration:.2f} seconds")

    # Prepare the output folder
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    output_dir = os.path.join(output_folder, video_name)
    ensure_dir(output_dir)

    frame_count = 0
    saved_frames = 0

    while capture.isOpened():
        ret, frame = capture.read()
        if not ret:
            break  # End of video

        # Save frames at the calculated interval
        if frame_count % frame_interval == 0:
            frame_filename = os.path.join(output_dir, f"frame_{frame_count}.jpg")
            if not os.path.exists(frame_filename):  # Avoid duplicates
                cv2.imwrite(frame_filename, frame)
                saved_frames += 1

        frame_count += 1

    capture.release()

    # Log metadata
    video_metadata = {
        "video_name": video_name,
        "video_path": video_path,
        "fps": fps,
        "duration": duration,
        "total_frames": total_frames,
        "saved_frames": saved_frames,
        "output_directory": output_dir
    }
    metadata_file_path = os.path.join(output_dir, "metadata_log.json")
    ensure_dir(os.path.dirname(metadata_file_path))

    # Write metadata to a JSON file
    with open(metadata_file_path, "w") as log:
        json.dump(video_metadata, log, indent=4)

    print(f"Extracted {saved_frames} frames from {video_path} to {output_dir}.")
    print(f"Metadata saved at {metadata_file_path}")

# Example Usage
video_dataset_folder = "/content/drive/MyDrive/TharuWorks/negative/train"  # Replace with your folder containing dashcam videos
output_frames_folder = "/content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train"  # Replace with your output folder

def process_video_dataset(video_folder, output_folder):
    """Process all videos in a dataset folder."""
    ensure_dir(output_folder)

    for root, _, files in os.walk(video_folder):
        for file in files:
            if file.endswith(('.mp4', '.avi', '.mov', '.mkv')):  # Supported video formats
                video_path = os.path.join(root, file)
                extract_frames(video_path, output_folder)

# Run the script
process_video_dataset(video_dataset_folder, output_frames_folder)

Processing video: /content/drive/MyDrive/TharuWorks/negative/train/w2_19.mp4
FPS: 30, Frame Interval: 90 frames, Duration: 6.43 seconds
Extracted 3 frames from /content/drive/MyDrive/TharuWorks/negative/train/w2_19.mp4 to /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w2_19.
Metadata saved at /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w2_19/metadata_log.json
Processing video: /content/drive/MyDrive/TharuWorks/negative/train/w1-33.mp4
FPS: 30, Frame Interval: 90 frames, Duration: 10.07 seconds
Extracted 4 frames from /content/drive/MyDrive/TharuWorks/negative/train/w1-33.mp4 to /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w1-33.
Metadata saved at /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train/w1-33/metadata_log.json
Processing video: /content/drive/MyDrive/TharuWorks/negative/train/w1-27.mp4
FPS: 30, Frame Interval: 90 frames, Duration: 8.40 seconds
Extracted 3 frames from /content/drive/MyDrive/TharuWorks/negativ

In [5]:
import cv2
import os
import math
import json

def ensure_dir(directory):
    """Ensure the output directory exists."""
    if not os.path.exists(directory):
        os.makedirs(directory)

def extract_frames(video_path, output_folder, interval_seconds=3):
    """
    Extract frames from a video based on the 3-second rule and log metadata.

    Parameters:
    - video_path: Path to the input video file.
    - output_folder: Path to save extracted frames.
    - interval_seconds: Time interval between frames in seconds (default = 3 seconds).
    """
    # Open the video file
    capture = cv2.VideoCapture(video_path)
    if not capture.isOpened():
        print(f"Error opening video file: {video_path}")
        return

    # Get video properties
    fps = int(capture.get(cv2.CAP_PROP_FPS))  # Frames per second
    total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))  # Total number of frames
    duration = total_frames / fps  # Calculate video duration in seconds
    frame_interval = math.ceil(fps * interval_seconds)  # Frames to skip for 3-second rule

    print(f"Processing video: {video_path}")
    print(f"FPS: {fps}, Frame Interval: {frame_interval} frames, Duration: {duration:.2f} seconds")

    # Prepare the output folder
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    output_dir = os.path.join(output_folder, video_name)
    ensure_dir(output_dir)

    frame_count = 0
    saved_frames = 0

    while capture.isOpened():
        ret, frame = capture.read()
        if not ret:
            break  # End of video

        # Save frames at the calculated interval
        if frame_count % frame_interval == 0:
            frame_filename = os.path.join(output_dir, f"frame_{frame_count}.jpg")
            if not os.path.exists(frame_filename):  # Avoid duplicates
                cv2.imwrite(frame_filename, frame)
                saved_frames += 1

        frame_count += 1

    capture.release()

    # Log metadata
    video_metadata = {
        "video_name": video_name,
        "video_path": video_path,
        "fps": fps,
        "duration": duration,
        "total_frames": total_frames,
        "saved_frames": saved_frames,
        "output_directory": output_dir
    }
    metadata_file_path = os.path.join(output_dir, "metadata_log.json")
    ensure_dir(os.path.dirname(metadata_file_path))

    # Write metadata to a JSON file
    with open(metadata_file_path, "w") as log:
        json.dump(video_metadata, log, indent=4)

    print(f"Extracted {saved_frames} frames from {video_path} to {output_dir}.")
    print(f"Metadata saved at {metadata_file_path}")

# Example Usage
video_dataset_folder = "/content/drive/MyDrive/TharuWorks/negative/val"  # Replace with your folder containing dashcam videos
output_frames_folder = "/content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/val"  # Replace with your output folder

def process_video_dataset(video_folder, output_folder):
    """Process all videos in a dataset folder."""
    ensure_dir(output_folder)

    for root, _, files in os.walk(video_folder):
        for file in files:
            if file.endswith(('.mp4', '.avi', '.mov', '.mkv')):  # Supported video formats
                video_path = os.path.join(root, file)
                extract_frames(video_path, output_folder)

# Run the script
process_video_dataset(video_dataset_folder, output_frames_folder)

Processing video: /content/drive/MyDrive/TharuWorks/negative/val/w1-14.mp4
FPS: 30, Frame Interval: 90 frames, Duration: 10.80 seconds
Extracted 4 frames from /content/drive/MyDrive/TharuWorks/negative/val/w1-14.mp4 to /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/val/w1-14.
Metadata saved at /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/val/w1-14/metadata_log.json
Processing video: /content/drive/MyDrive/TharuWorks/negative/val/w1-44.mp4
FPS: 30, Frame Interval: 90 frames, Duration: 8.20 seconds
Extracted 3 frames from /content/drive/MyDrive/TharuWorks/negative/val/w1-44.mp4 to /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/val/w1-44.
Metadata saved at /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/val/w1-44/metadata_log.json
Processing video: /content/drive/MyDrive/TharuWorks/negative/val/w1_9.mp4
FPS: 30, Frame Interval: 90 frames, Duration: 5.30 seconds
Extracted 2 frames from /content/drive/MyDrive/TharuWorks/negative/val/w1_9.mp4 to /

In [6]:
import cv2
import os
import math
import json

def ensure_dir(directory):
    """Ensure the output directory exists."""
    if not os.path.exists(directory):
        os.makedirs(directory)

def extract_frames(video_path, output_folder, interval_seconds=3):
    """
    Extract frames from a video based on the 3-second rule and log metadata.

    Parameters:
    - video_path: Path to the input video file.
    - output_folder: Path to save extracted frames.
    - interval_seconds: Time interval between frames in seconds (default = 3 seconds).
    """
    # Open the video file
    capture = cv2.VideoCapture(video_path)
    if not capture.isOpened():
        print(f"Error opening video file: {video_path}")
        return

    # Get video properties
    fps = int(capture.get(cv2.CAP_PROP_FPS))  # Frames per second
    total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))  # Total number of frames
    duration = total_frames / fps  # Calculate video duration in seconds
    frame_interval = math.ceil(fps * interval_seconds)  # Frames to skip for 3-second rule

    print(f"Processing video: {video_path}")
    print(f"FPS: {fps}, Frame Interval: {frame_interval} frames, Duration: {duration:.2f} seconds")

    # Prepare the output folder
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    output_dir = os.path.join(output_folder, video_name)
    ensure_dir(output_dir)

    frame_count = 0
    saved_frames = 0

    while capture.isOpened():
        ret, frame = capture.read()
        if not ret:
            break  # End of video

        # Save frames at the calculated interval
        if frame_count % frame_interval == 0:
            frame_filename = os.path.join(output_dir, f"frame_{frame_count}.jpg")
            if not os.path.exists(frame_filename):  # Avoid duplicates
                cv2.imwrite(frame_filename, frame)
                saved_frames += 1

        frame_count += 1

    capture.release()

    # Log metadata
    video_metadata = {
        "video_name": video_name,
        "video_path": video_path,
        "fps": fps,
        "duration": duration,
        "total_frames": total_frames,
        "saved_frames": saved_frames,
        "output_directory": output_dir
    }
    metadata_file_path = os.path.join(output_dir, "metadata_log.json")
    ensure_dir(os.path.dirname(metadata_file_path))

    # Write metadata to a JSON file
    with open(metadata_file_path, "w") as log:
        json.dump(video_metadata, log, indent=4)

    print(f"Extracted {saved_frames} frames from {video_path} to {output_dir}.")
    print(f"Metadata saved at {metadata_file_path}")

# Example Usage
video_dataset_folder = "/content/drive/MyDrive/TharuWorks/negative/test"  # Replace with your folder containing dashcam videos
output_frames_folder = "/content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/test"  # Replace with your output folder

def process_video_dataset(video_folder, output_folder):
    """Process all videos in a dataset folder."""
    ensure_dir(output_folder)

    for root, _, files in os.walk(video_folder):
        for file in files:
            if file.endswith(('.mp4', '.avi', '.mov', '.mkv')):  # Supported video formats
                video_path = os.path.join(root, file)
                extract_frames(video_path, output_folder)

# Run the script
process_video_dataset(video_dataset_folder, output_frames_folder)

Processing video: /content/drive/MyDrive/TharuWorks/negative/test/w1_7.mp4
FPS: 30, Frame Interval: 90 frames, Duration: 4.67 seconds
Extracted 2 frames from /content/drive/MyDrive/TharuWorks/negative/test/w1_7.mp4 to /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/test/w1_7.
Metadata saved at /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/test/w1_7/metadata_log.json
Processing video: /content/drive/MyDrive/TharuWorks/negative/test/w1-40.mp4
FPS: 30, Frame Interval: 90 frames, Duration: 9.60 seconds
Extracted 4 frames from /content/drive/MyDrive/TharuWorks/negative/test/w1-40.mp4 to /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/test/w1-40.
Metadata saved at /content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/test/w1-40/metadata_log.json
Processing video: /content/drive/MyDrive/TharuWorks/negative/test/w1-43.mp4
FPS: 30, Frame Interval: 90 frames, Duration: 12.30 seconds
Extracted 5 frames from /content/drive/MyDrive/TharuWorks/negative/test/w1-43.