In [14]:
import os
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import json

# Define the augmentation pipeline
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Rotate(limit=40, p=0.7),
    A.RandomSizedCrop(min_max_height=(150, 224), height=224, width=224, p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Normalize(mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0)),  # Apply only to images
    ToTensorV2()
])

# Function to transform polygons based on image augmentations
def transform_polygon(polygon, height, width, transform):
    polygon_array = np.array(polygon, dtype=np.float32)
    
    # Create a dummy image of zeros
    dummy_img = np.zeros((height, width), dtype=np.uint8)
    
    # Draw polygon on the dummy image
    cv2.fillPoly(dummy_img, [np.int32(polygon_array)], 1)
    
    # Apply the same transformation to the dummy image
    augmented = transform(image=dummy_img, mask=dummy_img)
    
    # Extract the transformed mask as numpy array
    transformed_mask = augmented['mask'].cpu().numpy()

    # Extract transformed points
    contours, _ = cv2.findContours(transformed_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours and len(contours[0]) > 2:
        return contours[0].reshape(-1, 2).tolist()
    else:
        return []

# Function to apply augmentation and save image, mask, and JSON
def augment_and_save(image_path, mask_path, json_path, save_img_dir, save_mask_dir, save_json_dir, augment_count=5):
    # Load the image and mask
    img = cv2.imread(image_path)
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

    img_name = os.path.basename(image_path).split('.')[0]
    mask_name = os.path.basename(mask_path).split('.')[0]
    
    height, width = img.shape[:2]

    # Ensure the mask is binary
    mask = (mask > 127).astype(np.uint8) * 255

    for i in range(augment_count):
        # Reload the JSON data for each augmentation
        with open(json_path, 'r') as json_file:
            json_data = json.load(json_file)

        augmented = transform(image=img, mask=mask)
        augmented_img = augmented['image'].cpu().numpy().transpose(1, 2, 0)
        augmented_mask = augmented['mask'].cpu().numpy()

        # Convert back to original scale (unnormalize) for the image
        augmented_img = (augmented_img * 255).astype(np.uint8)
        augmented_mask = (augmented_mask > 0.5).astype(np.uint8) * 255

        # Update JSON polygons
        if 'shapes' in json_data:
            augmented_shapes = []
            for shape in json_data['shapes']:
                polygon = shape['points']
                transformed_polygon = transform_polygon(polygon, height, width, transform)
                if len(transformed_polygon) > 0:
                    shape['points'] = transformed_polygon
                    augmented_shapes.append(shape)

            json_data['shapes'] = augmented_shapes
        
        # Update the `imagePath` field in the JSON file to match the new augmented image name
        new_img_name = f"{img_name}_aug_{i}.jpg"
        json_data['imagePath'] = new_img_name

        # Save the augmented image, mask, and JSON with updated filenames
        cv2.imwrite(os.path.join(save_img_dir, new_img_name), augmented_img)
        cv2.imwrite(os.path.join(save_mask_dir, f"{mask_name}_aug_{i}.png"), augmented_mask)

        # Save the updated JSON file
        with open(os.path.join(save_json_dir, f"{img_name}_aug_{i}.json"), 'w') as json_out_file:
            json.dump(json_data, json_out_file)

# Directories for images, masks, and JSON annotations
img_dir = 'data/jpg'
mask_dir = 'data/mask_png'
json_dir = 'data/json'  # Directory containing original JSON files

# Directories for saving augmented data
save_img_dir = 'data_aug/jpg_aug'
save_mask_dir = 'data_aug/mask_aug'
save_json_dir = 'data_aug/json_aug'

# Create output directories if they don't exist
os.makedirs(save_img_dir, exist_ok=True)
os.makedirs(save_mask_dir, exist_ok=True)
os.makedirs(save_json_dir, exist_ok=True)

# Apply augmentation and save for each image/mask/json triplet
for img_name in os.listdir(img_dir):
    img_path = os.path.join(img_dir, img_name)
    mask_path = os.path.join(mask_dir, img_name.replace('.jpg', '.png'))
    json_path = os.path.join(json_dir, img_name.replace('.jpg', '.json'))

    # Ensure both mask and JSON exist for each image
    if os.path.exists(mask_path) and os.path.exists(json_path):
        augment_and_save(img_path, mask_path, json_path, save_img_dir, save_mask_dir, save_json_dir)
    else:
        print(f"Mask or JSON not found for {img_name}, skipping augmentation.")


ValueError: Values for crop should be non negative and equal or smaller than image sizes(x_min = -2, y_min = -2, x_max = 222, y_max = 222, height = 217, width = 217)