## Install Necessary Libraries

In [1]:
!pip install ultralytics opencv-python-headless moviepy --quiet
import os
import torch
import cv2
from ultralytics import YOLO
import matplotlib.pyplot as plt

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/887.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.0/887.0 kB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[?25hCreating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


## Mount Google Drive

In [2]:
from google.colab import drive
# Step 1: Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Step 2: Define the base dataset path in Google Drive
data_path = "/content/drive/MyDrive/TharuWorks/negative"  # Base path to the dataset

## Split the Datase

In [4]:
from sklearn.model_selection import train_test_split
import shutil
import glob

# Step 3: List all video files across all subfolders of the dataset
# This will include files from `carcrash'`, `Crash dataset russian`, `Dachcam_dataset` directories.
video_files = glob.glob(os.path.join(data_path, '**', '*.mp4'), recursive=True)

# Step 4: Split the dataset into train, validation, and test sets
train_files, temp_files = train_test_split(video_files, test_size=0.3, random_state=42)
val_files, test_files = train_test_split(temp_files, test_size=0.5, random_state=42)

# Step 5: Create directories in Google Drive for train, validation, and test sets
train_dir = os.path.join(data_path, 'train')
val_dir = os.path.join(data_path, 'val')
test_dir = os.path.join(data_path, 'test')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Step 6: Copy the files to the respective Google Drive folders if they do not already exist

def copy_files_if_not_exists(file_list, destination_dir):
    for file in file_list:
        # Define the destination path for each file
        destination = os.path.join(destination_dir, os.path.basename(file))
        # Copy the file only if it does not already exist
        if not os.path.exists(destination):
            shutil.copy(file, destination)

# Train files
copy_files_if_not_exists(train_files, train_dir)

# Validation files
copy_files_if_not_exists(val_files, val_dir)

# Test files
copy_files_if_not_exists(test_files, test_dir)

print("Files have been successfully split and saved in Google Drive.")

Files have been successfully split and saved in Google Drive.


## Frame Extraction

In [5]:
import cv2
import os

def extract_frames_based_on_distance(folder_path, output_folder, target_distance_per_frame=0.41, target_gap=49.18):
    """
    Extract frames from videos in a folder based on distance covered per frame.
    Adjusts frame extraction rate according to the video's FPS to match a 3-second gap.

    Args:
        folder_path (str): Path to the folder containing videos.
        output_folder (str): Path to the folder where frames should be saved.
        target_distance_per_frame (float): Distance covered per frame in meters.
        target_gap (float): Target distance for a 3-second gap in meters.
    """
    os.makedirs(output_folder, exist_ok=True)

    for video_file in os.listdir(folder_path):
        video_path = os.path.join(folder_path, video_file)

        # Skip files that are not videos
        if not (video_file.endswith(".mp4") or video_file.endswith(".avi") or video_file.endswith(".mkv")):
            continue

        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)  # Get the video's FPS
        count = 0
        success, frame = cap.read()

        # Calculate the frame interval needed to cover the 3-second gap
        meters_per_frame = target_distance_per_frame * (fps / 60)  # Adjust for video's FPS
        frame_interval = int((target_gap / meters_per_frame) / fps)

        # Resume extraction if partially completed
        while success:
            frame_filename = os.path.join(output_folder, f"{video_file}_frame_{count}.jpg")

            # Extract frame based on the calculated frame interval
            if count % frame_interval == 0:
                cv2.imwrite(frame_filename, frame)

            count += 1
            success, frame = cap.read()

        cap.release()

In [6]:
folder_path = "/content/drive/MyDrive/TharuWorks/negative/train"
output_folder = "/content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/train"
extract_frames_based_on_distance(folder_path, output_folder)

In [7]:
folder_path = "/content/drive/MyDrive/TharuWorks/negative/test"
output_folder = "/content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/test"
extract_frames_based_on_distance(folder_path, output_folder)

In [8]:
folder_path = "/content/drive/MyDrive/TharuWorks/negative/val"
output_folder = "/content/drive/MyDrive/TharuWorks/Frames/Negative_Extract/val"
extract_frames_based_on_distance(folder_path, output_folder)