In [4]:
import os
import cv2
import random
import numpy as np
import albumentations as A
from albumentations.core.composition import OneOf
import imghdr

  import imghdr


In [None]:
def count_images_in_folder(folder_path):
    """
    Count the total number of image files in a folder.

    Parameters:
    - folder_path (str): Path to the folder to search for images.

    Returns:
    - int: The total number of image files in the folder.
    """
    image_count = 0
    supported_formats = {'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp'} 
    
    for root, _, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            if imghdr.what(file_path) in supported_formats:
                image_count += 1

    return image_count

In [None]:
folder_path = "./data/raw/Arjun" 
total_images = count_images_in_folder(folder_path)
print(f"Total number of images in the folder: {total_images}")

Total number of images in the folder: 327


In [None]:
def seed_everything(seed=42):
    """Seed all random generators to ensure reproducibility."""
    random.seed(seed)
    np.random.seed(seed)

def load_images_from_folder(folder_path):
    """Load all images from a folder."""
    images = []
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            images.append(os.path.join(folder_path, filename))
    return images

def save_image(output_folder, image, start_number):
    """Save the image with sequential numbering as the filename."""
    filename = os.path.join(output_folder, f"{start_number}.jpg")
    cv2.imwrite(filename, image)
    return start_number + 1

def augment_images(input_folder, output_folder, target_count, starting_number=1, seed=42):
    """Augment images and save them sequentially until the target count is reached."""
    seed_everything(seed)
    
    images = load_images_from_folder(input_folder)
    if not images:
        print("No images found in the folder!")
        return

    augmentations = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.5),
        
        OneOf([
            A.Perspective(scale=(0.05, 0.1), p=0.3),
            A.GridDistortion(p=0.3)
        ], p=0.5),

        OneOf([
            A.RandomBrightnessContrast(p=0.2),
            A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.2),
            A.CLAHE(clip_limit=4.0, tile_grid_size=(8, 8), p=0.3),
        ], p=0.4),

    ])


    current_count = len(images)
    start_number = starting_number

    while current_count < target_count:
        image_path = random.choice(images)
        image = cv2.imread(image_path)

        if image is None:
            print(f"Failed to load image: {image_path}")
            continue

        augmented = augmentations(image=image)
        augmented_image = augmented["image"]

        # Convert polygons to bounding boxes if needed
        if 'polygons' in augmented:
            augmented['bboxes'] = convert_polygons_to_bboxes(augmented['polygons'])

        start_number = save_image(output_folder, augmented_image, start_number)
        current_count += 1

    print(f"Augmented dataset expanded to {current_count} images.")

In [None]:
input_folder = "./data/raw/Wildan" 
output_folder = "./data/augmented/Wildan"  
target_count = 421  
starting_number = 163 

augment_images(input_folder, output_folder, target_count, starting_number)

Augmented dataset expanded to 421 images.


In [None]:
input_folder = "./data/raw/Arjun"  
output_folder = "./data/augmented/Arjun"  
target_count = 620  
starting_number = 661 

augment_images(input_folder, output_folder, target_count, starting_number)

Augmented dataset expanded to 620 images.


In [10]:
def extractImages(pathIn, pathOut, baseFilename):
    count = 0
    vidcap = cv2.VideoCapture(pathIn)
    if not vidcap.isOpened():
        print(f"Error: Could not open video file {pathIn}")
        return
    
    success, image = vidcap.read()
    while success:
        vidcap.set(cv2.CAP_PROP_POS_MSEC, (count * 1000)) 
        success, image = vidcap.read()
        if not success:
            print(f"Error: Could not read frame at {count * 1000} ms")
            break
        if image is None:
            print(f"Error: Frame at {count * 1000} ms is empty")
            break
        print('Read a new frame: ', success)
        cv2.imwrite(f"{pathOut}/{baseFilename}_frame{count}.jpg", image)  
        count += 1

def processAllVideos(folderIn, folderOut):
    for filename in os.listdir(folderIn):
        if filename.endswith(".mp4"):
            pathIn = os.path.join(folderIn, filename)
            baseFilename = os.path.splitext(filename)[0]
            extractImages(pathIn, folderOut, baseFilename)


In [None]:
folderIn = "./data/raw/Andakara/video"
folderOut = "./data/raw/Andakara"
processAllVideos(folderIn, folderOut)

Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Error: Could not read frame at 20000 ms
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Error: Could not read frame at 6000 ms
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Error: Could not read frame at 8000 ms
Read a new frame:  True
Read a new frame:  True
Read a new frame: 

In [None]:
folder_path = "./data/raw/Andakara"  
total_images = count_images_in_folder(folder_path)
print(f"Total number of images in the folder: {total_images}")

Total number of images in the folder: 169


In [None]:
input_folder = "./data/raw/Andakara"  
output_folder = "./data/augmented/Andakara"  
target_count =  200
starting_number = 954

augment_images(input_folder, output_folder, target_count, starting_number)

Augmented dataset expanded to 200 images.


In [11]:
folderIn = "./data/raw/Andakara/video/2"
folderOut = "./data/raw/Andakara"
processAllVideos(folderIn, folderOut)

Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Error: Could not read frame at 14000 ms
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Read a new frame:  True
Error: Could not read frame at 13000 ms


In [15]:
model_dir = "./data/augmented/Arjun"
if os.path.isdir(model_dir):
    for i, filename in enumerate(os.listdir(model_dir), start=1211):
        old_filename = os.path.join(model_dir, filename)
        new_filename = os.path.join(model_dir, f"{i}.png")
        if os.path.exists(new_filename):
            os.remove(new_filename)
        if os.path.exists(old_filename):
            os.rename(old_filename, new_filename)
        else:
            print(f"File not found: {old_filename}")