In [21]:
import fitz
from PIL import Image
import os
import cv2
import albumentations as A
from tqdm import tqdm
import random

Image.MAX_IMAGE_PIXELS = None

Convert PDF to PNG

In [None]:
def convert_pdf_to_image(pdf_path, dpi):
    pdf_document = fitz.open(pdf_path)
    page = pdf_document.load_page(0)
    mat = fitz.Matrix(dpi / 72, dpi / 72)
    pix = page.get_pixmap(matrix=mat)
    image_path = "cropped_image.png"
    pix.save(image_path)

In [None]:
pdf_path = "68484 - Longley Area Park, Sheffield - Site 3 - Orthomosaic.pdf"
convert_pdf_to_image(pdf_path, 1200)

Crop images

In [None]:
def crop_image(image_path, square_size, output_folder):
    image = Image.open(image_path)
    width, height = image.size
    x_steps = (width + square_size - 1) // square_size
    y_steps = (height + square_size - 1) // square_size

    if not os.path.exists(output_folder):  # Create the output folder if it doesn't exist
        os.makedirs(output_folder)

    # Find the highest existing crop number
    existing_crops = [f for f in os.listdir(output_folder) if f.startswith('crop_') and f.endswith('.png')]
    start_count = 0
    if existing_crops:
        # Extract numbers from filenames and find the maximum
        numbers = [int(f.split('_')[1].split('.')[0]) for f in existing_crops]
        start_count = max(numbers) + 1

    count = start_count
    for y in range(y_steps):
        for x in range(x_steps):
            left = x * square_size
            upper = y * square_size
            right = min(left + square_size, width)
            lower = min(upper + square_size, height)

            # Crop and save the square
            cropped_image = image.crop((left, upper, right, lower))
            output_path = os.path.join(output_folder, f'crop_{count}.png')
            cropped_image.save(output_path)
            count += 1

In [None]:
crop_image('image_68484 - Longley Area Park, Sheffield - Site 3.png', 1000, 'cropped_images')

Data augmentation

In [25]:
# load images
def load_images(folder_path):
    return [os.path.join(folder_path, filename) for filename in os.listdir(folder_path)]

# save images
def save_augmented_image(image, output_path, original_path):
    filename = os.path.basename(original_path)
    output_filepath = os.path.join(output_path, filename)
    cv2.imwrite(output_filepath, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

def apply_augmentations(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # data augmentation methods
    augmentations = {
        'flip': A.Compose([
            A.HorizontalFlip(p=1.0),  # horizontal flip
            A.VerticalFlip(p=1.0)     # vertical flip
        ]),
        'rotate': A.Compose([
            A.RandomRotate90(p=1.0),  # random rotation
        ]),
        'color': A.Compose([
            A.RandomBrightnessContrast(p=1.0),  # random brightness and contrast
            A.HueSaturationValue(p=1.0)         # random saturation value
        ]),
        'noise': A.Compose([
            A.GaussNoise(p=1.0),              # Gauss noise
            A.ISONoise(p=1.0)                 # ISO noise
        ]),
        'blur': A.Compose([
            A.GaussianBlur(p=1.0),           # Gaussian blur
            A.MotionBlur(p=1.0)              # Motion blur
        ])
    }

    aug_types = list(augmentations.keys())

    # load images
    image_paths = load_images(input_folder)

    # apply a random augmentation method to each image
    for img_path in tqdm(image_paths):
        # read the images
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # choose a random augmentation method
        aug_type = random.choice(aug_types)
        transform = augmentations[aug_type]

        # apply augmentation and save
        augmented = transform(image=image)['image']
        save_augmented_image(augmented, output_folder, img_path)

In [26]:
apply_augmentations('cropped_images', 'augmented_images')

100%|██████████| 3440/3440 [03:33<00:00, 16.08it/s]
