In [15]:
import os
import json
from PIL import Image
from utils import print_progress_bar
import shutil

In [16]:
def cut(ds, n):
    return ds[:n]


def join_labels(ds):
    # old_labels = ['plain nature', 'detailed nature', 'Agriculture', 'villages', 'city']
    # new_labels = ['nature', 'country', 'city']
    new_ds = []

    for obj in ds:
        label = obj["label"]
        if isinstance(label, str):
            label = [label]

        if 'city' in label:
            new_label = 'city'
        elif ('Agriculture' in label) or ('villages' in label) or ('country' in label):
            new_label = 'country'
        elif ('plain nature' in label) or ('detailed nature' in label) or ('nature' in label):
            new_label = 'nature'
        else:
            return 1

        # Update the label in the dataset object
        obj['label'] = new_label

        # Update the file path
        old_path = obj["path"]
        folder, filename = os.path.split(old_path)
        parent_folder = os.path.dirname(folder)
        new_folder = os.path.join(parent_folder, new_label)
        new_path = os.path.join(new_folder, filename)

        # Move the file to the new path
        shutil.move(old_path, new_path)

        # Update the path in the dataset object
        obj["path"] = new_path

        new_ds.append(obj)

    return new_ds


def augment_data(ds):
    augmented_dataset = []
    total_images = len(ds)

    for i, entry in enumerate(ds):
        original_path = entry["path"]
        original_label = entry["label"]
        original_entropy_results = entry["entropy_results"]

        # Load the original image
        try:
            img = Image.open(original_path).convert('RGB')
        except FileNotFoundError:
            print(f"\nFile not found: {original_path}")
            continue

        # Generate augmented images
        for angle in [0, 90, 180, 270]:
            for flip in [False, True]:
                # Rotate and flip
                new_img = img.rotate(angle)
                if flip:
                    new_img = new_img.transpose(Image.FLIP_LEFT_RIGHT)

                # Generate new path
                folder, filename = os.path.split(original_path)
                new_path = os.path.join(
                    folder,
                    f"{os.path.splitext(filename)[0]}_rot{angle}_flip{int(flip)}.png"
                )

                # Save the new image, overwrite if exists
                new_img.save(new_path)

                # Create new dataset entry
                new_entry = {
                    "index": len(augmented_dataset),
                    "path": new_path,
                    "size": os.path.getsize(new_path),
                    "pixel size": new_img.size,
                    "location": entry["location"],
                    "label": original_label,
                    "entropy_results": original_entropy_results  # Entropy results are the same as the original image
                }

                # Add to the augmented dataset
                augmented_dataset.append(new_entry)

        print_progress_bar("Images processed", i + 1, total_images)

    return augmented_dataset


def create_greyscale_images(root_folder):
    for subdir, _, files in os.walk(root_folder):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg')):
                img_path = os.path.join(subdir, file)
                
                try:
                    img = Image.open(img_path).convert('RGB')
                except FileNotFoundError:
                    print(f"File not found: {img_path}")
                    continue
                
                gray_img = img.convert('L')
                
                gray_file_name = f"{os.path.splitext(file)[0]}_gray.png"
                gray_img_path = os.path.join(subdir, gray_file_name)
                
                gray_img.save(gray_img_path)
                
                
def write_folder_structure_to_json(root_folder, json_file_path):
    folder_structure = {}
    
    for subdir, dirs, files in os.walk(root_folder):
        relative_path = os.path.relpath(subdir, root_folder)
        folder_structure[relative_path] = files
    
    with open(json_file_path, 'w') as f:
        json.dump(folder_structure, f, indent=4)
        

def augment_images_in_folder(root_folder):
    total_images = sum([len(files) for r, d, files in os.walk(root_folder)])
    processed_images = 0

    for subdir, _, files in os.walk(root_folder):
        for file in files:
            if not file.endswith('.png'):
                continue

            original_path = os.path.join(subdir, file)
            folder, filename = os.path.split(original_path)

            # Load the original image
            try:
                img = Image.open(original_path).convert('RGB')
            except FileNotFoundError:
                print(f"\nFile not found: {original_path}")
                continue

            # Generate augmented images
            for angle in [0, 90, 180, 270]:
                for flip in [False, True]:
                    # Generate new path
                    new_filename = f"{os.path.splitext(filename)[0]}_rot{angle}_flip{int(flip)}.png"
                    new_path = os.path.join(folder, new_filename)

                    # Skip if the image already exists
                    if os.path.exists(new_path):
                        continue

                    # Rotate and flip
                    new_img = img.rotate(angle)
                    if flip:
                        new_img = new_img.transpose(Image.FLIP_LEFT_RIGHT)

                    # Save the new image
                    new_img.save(new_path)

            processed_images += 1
            print(f"Images processed: {processed_images}/{total_images}")

In [23]:
path = "../datasets/classified_pictures/entropy_results_joined_labels.json"
# path = "../processed/results/entropy_results.json"
with open(path, 'r') as f:
    dataset = json.load(f)

In [24]:
dataset = augment_data(dataset)
# dataset = join_labels(dataset)

Images processed: ██████████████████████████████████████████████████ | 100.0% Complete 4437/4437 instances.

In [25]:
path = "../datasets/classified_pictures/entropy_results_augmented.json"
# path = "../datasets/classified_pictures/entropy_results_joined_labels.json"
with open(path, 'w') as f:
    json.dump(dataset, f, indent=4)

In [None]:
# create greyscale
root_folder = "../datasets/classified_images"
create_greyscale_images(root_folder)

In [None]:
# save folder structure
root_folder = "../datasets/classified_images"
json_file_path = "folder_structure.json"
write_folder_structure_to_json(root_folder, json_file_path)