## Frame creation from video

In [4]:
import cv2
import os
import shutil
import random
import zipfile

In [2]:
# === CONFIGURATION ===
video_path = 'sample.mp4'     # Replace with your video file
output_dir = 'frames'             # Folder to save frames
frame_interval = 1              # Save every 1 frames

# === CREATE OUTPUT FOLDER ===
os.makedirs(output_dir, exist_ok=True)

# === OPEN VIDEO ===
cap = cv2.VideoCapture(video_path)

frame_id = 0      # Current frame in video
saved_id = 0      # Frame image counter for output

while True:
    ret, frame = cap.read()
    if not ret:
        break

    if frame_id % frame_interval == 0:
        output_path = os.path.join(output_dir, f"frame_{saved_id}.jpg")
        cv2.imwrite(output_path, frame)
        saved_id += 1

    frame_id += 1

cap.release()
print("✅ Done extracting frames.")

✅ Done extracting frames.


## Unzip the labels file created by labelImg and create empty labels for image without name tag

In [6]:
zip_path = "labels.zip"  # Replace with the actual filename
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall("extracted_folder")  # or any desired folder name


In [7]:
images_dir = 'frames'
labels_dir = 'extracted_folder/labels'

os.makedirs(labels_dir, exist_ok=True)

for filename in os.listdir(images_dir):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        label_name = os.path.splitext(filename)[0] + '.txt'
        label_path = os.path.join(labels_dir, label_name)
        if not os.path.exists(label_path):
            open(label_path, 'w').close()


## Splitting dataset to train, validate & test data 

In [9]:
images_data = 'frames'
labels_data = 'extracted_folder/labels'

base_dir = 'dataset'
splits = ['train','val','test']
split_ratio = [0.7, 0.2, 0.1]

for split in splits:
    os.makedirs(os.path.join(base_dir, 'images', split), exist_ok=True)
    os.makedirs(os.path.join(base_dir, 'labels', split), exist_ok=True)
    
image_files = [f for f in os.listdir(images_data) if f.endswith(('.jpg', '.png'))]
random.shuffle(image_files)

total = len(image_files)
train_end = int(total * split_ratio[0])
val_end = train_end + int(total * split_ratio[1])

split_files = {
    'train': image_files[:train_end],
    'val': image_files[train_end:val_end],
    'test': image_files[val_end:]
}

In [10]:
def copy_split(split_name, files):
    for img_file in files:
        # Copy image
        src_img = os.path.join(images_data, img_file)
        dst_img = os.path.join(base_dir, 'images', split_name, img_file)
        shutil.copy(src_img, dst_img)

        # Copy corresponding label
        label_file = os.path.splitext(img_file)[0] + '.txt'
        src_label = os.path.join(labels_data, label_file)
        dst_label = os.path.join(base_dir, 'labels', split_name, label_file)

        if os.path.exists(src_label):
            shutil.copy(src_label, dst_label)
        else:
            open(dst_label, 'w').close()  # create empty label if missing

In [11]:
for split in splits:
    copy_split(split, split_files[split])
    print(f"{split.capitalize()}: {len(split_files[split])} files")

Train: 938 files
Val: 268 files
Test: 135 files
