In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm

In [2]:
# ================= CONFIG =================
DATASET_PATH = r"D:\Car Crash Detection\videos"
SAVE_DIR = r"D:\Car Crash Detection\processed"

IMG_SIZE = 160
SEQUENCE_LENGTH = 12

CLASSES = {
    "Crash": 1,
    "Normal": 0
}

In [3]:
def extract_frames(video_path, sequence_length=SEQUENCE_LENGTH):
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        return None

    frames = []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if total_frames <= 0:
        cap.release()
        return None

    frame_indices = np.linspace(0, total_frames - 1, sequence_length).astype(int)

    current_frame = 0
    frame_pos = 0

    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret or frame is None:
                break

            if frame_pos < sequence_length and current_frame == frame_indices[frame_pos]:
                frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = frame.astype("float32") / 255.0
                frames.append(frame)
                frame_pos += 1

            current_frame += 1
            if frame_pos >= sequence_length:
                break

    except Exception:
        cap.release()
        return None

    cap.release()

    if len(frames) == 0:
        return None

    # Pad short videos
    while len(frames) < sequence_length:
        frames.append(frames[-1])

    return np.array(frames, dtype="float32")


In [4]:
print("Saving tensors to:", os.path.abspath(SAVE_DIR))

Saving tensors to: D:\Car Crash Detection\processed


In [5]:
def preprocess_dataset(dataset_path, save_dir=SAVE_DIR):
    os.makedirs(save_dir, exist_ok=True)

    saved_count = 0
    skipped_count = 0

    for class_name, label in CLASSES.items():
        class_path = os.path.join(dataset_path, class_name)

        for video_file in tqdm(os.listdir(class_path), desc=f"Processing {class_name}"):
            video_path = os.path.join(class_path, video_file)

            frames = extract_frames(video_path)
            if frames is None:
                skipped_count += 1
                continue

            # ✅ FIX: include class name to avoid overwriting
            base_name = os.path.splitext(video_file)[0]
            file_id = f"{class_name}_{base_name}"

            X_path = os.path.join(save_dir, f"{file_id}_X.npy")
            y_path = os.path.join(save_dir, f"{file_id}_y.npy")

            # Resume-safe: skip already processed files
            if os.path.exists(X_path) and os.path.exists(y_path):
                continue

            np.save(X_path, frames)
            np.save(y_path, label)
            saved_count += 1

    print("✅ Preprocessing complete")
    print("Saved videos :", saved_count)
    print("Skipped videos :", skipped_count)


In [6]:
preprocess_dataset(DATASET_PATH)

Processing Crash: 100%|████████████████████████████████████████████████████████████| 1500/1500 [05:46<00:00,  4.32it/s]
Processing Normal: 100%|███████████████████████████████████████████████████████████| 3000/3000 [11:38<00:00,  4.30it/s]

✅ Preprocessing complete
Saved videos : 4500
Skipped videos : 0





In [8]:
import glob

X_files = glob.glob(r"D:\Car Crash Detection\processed\*_X.npy")
y_files = glob.glob(r"D:\Car Crash Detection\processed\*_y.npy")

print("X files:", len(X_files))
print("y files:", len(y_files))
print("Total files:", len(X_files) + len(y_files))

X files: 4500
y files: 4500
Total files: 9000
