In [1]:
import os
import json
import cv2
import albumentations as A
from tqdm import tqdm
import random

  data = fetch_version_info()


In [2]:
META_JSON_FILE = 'meta.json'
with open(META_JSON_FILE, "r") as meta_file:
    meta_data = json.load(meta_file)

class_names = [
    "longitudinal crack",
    "transverse crack",
    "alligator crack",
    "block crack",
    "pothole",
    "manhole cover",
    "other corruption"
]
class_map = {name: idx for idx, name in enumerate(class_names)}
print("Loaded class mapping:", class_map)

Loaded class mapping: {'longitudinal crack': 0, 'transverse crack': 1, 'alligator crack': 2, 'block crack': 3, 'pothole': 4, 'manhole cover': 5, 'other corruption': 6}


In [3]:
reverse_class_map = {v: k for k, v in class_map.items()}
print("Reverse class mapping:", reverse_class_map)


Reverse class mapping: {0: 'longitudinal crack', 1: 'transverse crack', 2: 'alligator crack', 3: 'block crack', 4: 'pothole', 5: 'manhole cover', 6: 'other corruption'}


In [4]:
def convert_bbox_to_yolo(exterior, img_w, img_h):
    x_min, y_min = exterior[0]
    x_max, y_max = exterior[1]
    x_center = ((x_min + x_max) / 2) / img_w
    y_center = ((y_min + y_max) / 2) / img_h
    width = (x_max - x_min) / img_w
    height = (y_max - y_min) / img_h
    return [x_center, y_center, width, height]

def convert_yolo_to_bbox(yolo_box, img_w, img_h):
    x_c, y_c, w, h = yolo_box
    x_min = int((x_c - w / 2) * img_w)
    y_min = int((y_c - h / 2) * img_h)
    x_max = int((x_c + w / 2) * img_w)
    y_max = int((y_c + h / 2) * img_h)
    return [[x_min, y_min], [x_max, y_max]]


In [None]:
augmentations = [
    ("HFlip", A.HorizontalFlip(p=1.0)),
    ("Bright", A.RandomBrightnessContrast(p=1.0)),
    ("Rotate", A.Rotate(limit=15, p=1.0)),
    ("Blur", A.MotionBlur(p=1.0))
    ]


In [6]:
def apply_augmentation(image, boxes, labels, transform, w, h):
    try:
        aug = transform(image=image, bboxes=boxes, class_labels=labels)
        return aug['image'], aug['bboxes'], aug['class_labels']
    except:
        return None, None, None
def augment_image(image_path, annotation_path, save_dir):
    with open(annotation_path) as f:
        ann = json.load(f)

    img = cv2.imread(image_path)
    h, w = ann['size']['height'], ann['size']['width']

    orig_boxes = []
    class_labels = []

    for obj in ann['objects']:
        bbox = convert_bbox_to_yolo(obj['points']['exterior'], w, h)
        orig_boxes.append(bbox)
        class_labels.append(class_map[obj['classTitle']])

    base_name = os.path.splitext(os.path.basename(image_path))[0]

   

    # --- Composite Augmentation ---
    aug2_types = random.sample(augmentations, 2)
    transform_composite = A.Compose(
        [aug2_types[0][1], aug2_types[1][1]],
        bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'])
    )
    img_comp, boxes_comp, labels_comp = apply_augmentation(img, orig_boxes, class_labels, transform_composite, w, h)

    if img_comp is not None:
        name_comp = f"{base_name}_aug1_{aug2_types[0][0]}_{aug2_types[1][0]}.jpg"
        cv2.imwrite(os.path.join(save_dir, "images", name_comp), img_comp)

        ann_comp = ann.copy()
        ann_comp['objects'] = []
        for box, label in zip(boxes_comp, labels_comp):
            ann_comp['objects'].append({
                "classTitle": reverse_class_map[label],
                "points": {"exterior": convert_yolo_to_bbox(box, w, h), "interior": []},
                "geometryType": "rectangle"
            })
        with open(os.path.join(save_dir, "annotations", name_comp.replace(".jpg", ".json")), "w") as f:
            json.dump(ann_comp, f, indent=2)

In [17]:

aug_save_dir = "datasets/yolo_dataset/augmented"

def augment_random_images(save_dir, num_images=15000):
    """
    Randomly pick N unique images from train folder and apply augmentation.
    Args:
        save_dir (str): Directory to save augmented images and annotations
        num_images (int): Number of images to augment
    """
    img_dir = 'datasets/yolo_dataset/train/images'
    ann_dir = 'datasets/yolo_dataset/train/annotations'

    # Get all available images
    all_images = [f for f in os.listdir(img_dir) if f.endswith(".jpg")]
    if not all_images:
        print("No images found in train/images.")
        return

    # If num_images > available, reduce it
    num_images = min(num_images, len(all_images))

    # Pick random unique images
    selected_images = random.sample(all_images, num_images)

    # Create output directories
    os.makedirs(os.path.join(save_dir, "images"), exist_ok=True)
    os.makedirs(os.path.join(save_dir, "annotations"), exist_ok=True)

    # Loop through selected images
    for idx, selected_img in enumerate(selected_images, 1):
        image_path = os.path.join(img_dir, selected_img)
        annotation_path = os.path.join(ann_dir, selected_img + ".json")

        if not os.path.exists(annotation_path):
            print(f"⚠️ Annotation missing for {selected_img}, skipping...")
            continue

        # Apply augmentation
        augment_image(image_path, annotation_path, save_dir)

        if idx % 500 == 0:
            print(f"✅ Augmented {idx}/{num_images} images...")

    print(f"\n🎉 Completed augmentation for {len(selected_images)} images. Saved to {save_dir}")


# Run for 15,000 images
augment_random_images(save_dir=aug_save_dir, num_images=15000)

  self._set_keys()


✅ Augmented 500/15000 images...
✅ Augmented 1000/15000 images...
✅ Augmented 1500/15000 images...
✅ Augmented 2000/15000 images...
✅ Augmented 2500/15000 images...
✅ Augmented 3000/15000 images...
✅ Augmented 3500/15000 images...
✅ Augmented 4000/15000 images...
✅ Augmented 4500/15000 images...
✅ Augmented 5000/15000 images...
✅ Augmented 5500/15000 images...
✅ Augmented 6000/15000 images...
✅ Augmented 6500/15000 images...
✅ Augmented 7000/15000 images...
✅ Augmented 7500/15000 images...
✅ Augmented 8000/15000 images...
✅ Augmented 8500/15000 images...
✅ Augmented 9000/15000 images...
✅ Augmented 9500/15000 images...
✅ Augmented 10000/15000 images...
✅ Augmented 10500/15000 images...
✅ Augmented 11000/15000 images...
✅ Augmented 11500/15000 images...
✅ Augmented 12000/15000 images...
✅ Augmented 12500/15000 images...
✅ Augmented 13000/15000 images...
✅ Augmented 13500/15000 images...
✅ Augmented 14000/15000 images...
✅ Augmented 14500/15000 images...
✅ Augmented 15000/15000 images...
