In [1]:
'''
OUTLINE OF IDEA:

- We have a dataset for buff orpingtons now with the same naming style as the other chicken datasets in:
Himel, Galib Muhammad Shahriar; Islam, Md Masudul (2023), “GalliformeSpectra: A Hen Breed Dataset”, Mendeley Data, V1, doi: 10.17632/nk3zbvd5h8.1
found at https://data.mendeley.com/datasets/nk3zbvd5h8/1

- We need to apply data augmentation to the dataset
  - first we need to resize all images to 224x224 and save as .png
  - then apply rotations, flips, brightness, zooms, etc. to augment

Project ideas:
- extend the basic classifier idea from the study https://www.sciencedirect.com/science/article/pii/S2352340923010260
  but such that it simply has another class (orpington)
- get it to identify if a breed is an orpington regardless of colour or not
- get it to correctly identify a breed from another of the same colour, i.e. buff orpington vs buff rock vs buff sussex (would need to create the datasets for this)

'''
from google.colab import drive
drive.mount('/content/drive') # access the files in google drive

Mounted at /content/drive


In [2]:
# Sequential approach to resizing the images --> slower than using multiprocessing
'''
from PIL import Image
import os

source_folder = '/content/drive/My Drive/Chickens/train(Original)/'
destination_folder = '/content/drive/My Drive/Chickens/train(224x224)/'

os.makedirs(destination_folder, exist_ok=True)  # create folder if it isn't already present


# Iterate through folders in the source directory
for folder in os.listdir(source_folder):
    breed_folder = os.path.join(destination_folder, folder)
    os.makedirs(breed_folder, exist_ok=True)

    # Construct the full path of the current folder
    current_folder_path = os.path.join(source_folder, folder)

    # Iterate through JPG files in the current breed folder
    for filename in os.listdir(current_folder_path):  # Iterate through files in the current folder
        if filename.endswith('.jpg') or filename.endswith('.jpeg'):
            img = Image.open(os.path.join(current_folder_path, filename))
            img_resized = img.resize((224, 224))
            png_filename = '.'.join(filename.split('.')[:-1]) + '.png'
            # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
            # = Join the first two parts and add '.png' --> each file is formatted 'aa.0.jpg' where the first part corresponds to breed and the number corresponds to that file's number within the breed folder
            # = Due to this, we must split it by '.' to change the file ending but also join the breed identifier and picture number in the folder before adding .png
            #
            img_resized.save(os.path.join(breed_folder, png_filename), format='PNG')

print("Files resized successfully")
'''

'\nfrom PIL import Image\nimport os\n\nsource_folder = \'/content/drive/My Drive/Chickens/train(Original)/\'\ndestination_folder = \'/content/drive/My Drive/Chickens/train(224x224)/\'\n\nos.makedirs(destination_folder, exist_ok=True)  # create folder if it isn\'t already present\n\n\n# Iterate through folders in the source directory\nfor folder in os.listdir(source_folder):\n    breed_folder = os.path.join(destination_folder, folder)\n    os.makedirs(breed_folder, exist_ok=True)\n\n    # Construct the full path of the current folder\n    current_folder_path = os.path.join(source_folder, folder)\n\n    # Iterate through JPG files in the current breed folder\n    for filename in os.listdir(current_folder_path):  # Iterate through files in the current folder\n        if filename.endswith(\'.jpg\') or filename.endswith(\'.jpeg\'):\n            img = Image.open(os.path.join(current_folder_path, filename))\n            img_resized = img.resize((224, 224))\n            png_filename = \'.\'.jo

In [8]:
from typing import Callable, Tuple, Union
from PIL import Image
import os
from multiprocessing import Pool
from tqdm import tqdm

def resize_image(args: Tuple[str, str]) -> None:
    source_path, destination_path = args
    img = Image.open(source_path)

    # Convert image to RGB if it's in CMYK
    if img.mode == 'CMYK':
        img = img.convert('RGB')

    img_resized = img.resize((224, 224))  # Fixed size for resizing
    img_resized.save(destination_path, format='PNG')

def resize_images(source_folder: str, destination_folder: str):
    os.makedirs(destination_folder, exist_ok=True)  # Create destination folder if it doesn't exist

    tasks = []
    for folder in os.listdir(source_folder):
        breed_folder = os.path.join(destination_folder, folder)
        os.makedirs(breed_folder, exist_ok=True)

        current_folder_path = os.path.join(source_folder, folder)

        for filename in os.listdir(current_folder_path):
            if filename.endswith('.jpg') or filename.endswith('.jpeg'):
                source_path = os.path.join(current_folder_path, filename)
                destination_path = os.path.join(breed_folder, '.'.join(filename.split('.')[:-1]) + '.png')
                tasks.append((source_path, destination_path))

    # Use multiprocessing to resize images with progress feedback
    with Pool() as pool:
        for _ in tqdm(pool.imap(resize_image, tasks), total=len(tasks)):
            pass  # Process images with progress bar

# Example usage
source_folder = '/content/drive/My Drive/Chickens/train(Original)/'
destination_folder = '/content/drive/My Drive/Chickens/train(224x224)/'

# Using the resize_image function
resize_images(source_folder, destination_folder)

100%|██████████| 1111/1111 [01:07<00:00, 16.43it/s]


In [15]:
# Augment data by creating several copies per file --> make sure resized images exist -> if they don't run the previous code block

from PIL import ImageEnhance

def rotate_image(args: Tuple[str, str, int]) -> None:
    source_path, destination_path, degrees = args
    img = Image.open(source_path)
    img_augmented = img.rotate(degrees)  # Rotate by x degrees
    img_augmented.save(destination_path, format='PNG')

def horizontal_flip_image(args: Tuple[str, str]) -> None:
    source_path, destination_path = args
    img = Image.open(source_path)
    img_augmented = img.transpose(Image.FLIP_LEFT_RIGHT) # Flip the image horizontally
    img_augmented.save(destination_path, format='PNG')

def vertical_flip_image(args: Tuple[str, str]) -> None:
    source_path, destination_path = args
    img = Image.open(source_path)
    img_augmented = img.transpose(Image.FLIP_TOP_BOTTOM) # Flip the image horizontally
    img_augmented.save(destination_path, format='PNG')

def adjust_brightness(args: Tuple[str, str, float]) -> None:
    source_path, destination_path, brightness_factor = args # brightness_factor >>> (1.0 = original, <1.0 = darker, >1.0 = brighter)
    img = Image.open(source_path)
    enhancer = ImageEnhance.Brightness(img) # Create a brightness enhancer
    img_brightened = enhancer.enhance(brightness_factor) # Apply the brightness factor
    img_brightened.save(destination_path, format='PNG') # Save the adjusted image

def brighten_images( source_folder: str,
                   destination_folder: str,
                   brightness_factor: int):
    os.makedirs(destination_folder, exist_ok=True)  # Create destination folder if it doesn't exist

    tasks = []
    for folder in os.listdir(source_folder):
        breed_folder = os.path.join(destination_folder, folder)
        os.makedirs(breed_folder, exist_ok=True)

        current_folder_path = os.path.join(source_folder, folder)

        for filename in os.listdir(current_folder_path):
            # ALL FILES SHOULD HAVE .PNG AS YOU SHOULD BE ACCESSING THE RESIZED IMAGES
            source_path = os.path.join(current_folder_path, filename)
            destination_path = os.path.join(breed_folder, '.'.join(filename.split('.')[:-1]) + f'brightened{brightness_factor}.png')
            tasks.append((source_path, destination_path, brightness_factor))

     # Use multiprocessing to resize images with progress feedback
    with Pool() as pool:
        for _ in tqdm(pool.imap(adjust_brightness, tasks), total=len(tasks)):
            pass  # Process images with progress bar


def vertical_flip_images( source_folder: str,
                   destination_folder: str):
    os.makedirs(destination_folder, exist_ok=True)  # Create destination folder if it doesn't exist

    tasks = []
    for folder in os.listdir(source_folder):
        breed_folder = os.path.join(destination_folder, folder)
        os.makedirs(breed_folder, exist_ok=True)

        current_folder_path = os.path.join(source_folder, folder)

        for filename in os.listdir(current_folder_path):
            # ALL FILES SHOULD HAVE .PNG AS YOU SHOULD BE ACCESSING THE RESIZED IMAGES
            source_path = os.path.join(current_folder_path, filename)
            destination_path = os.path.join(breed_folder, '.'.join(filename.split('.')[:-1]) + 'vflip.png')
            tasks.append((source_path, destination_path))

     # Use multiprocessing to resize images with progress feedback
    with Pool() as pool:
        for _ in tqdm(pool.imap(vertical_flip_image, tasks), total=len(tasks)):
            pass  # Process images with progress bar

def horizontal_flip_images( source_folder: str,
                   destination_folder: str):
    os.makedirs(destination_folder, exist_ok=True)  # Create destination folder if it doesn't exist

    tasks = []
    for folder in os.listdir(source_folder):
        breed_folder = os.path.join(destination_folder, folder)
        os.makedirs(breed_folder, exist_ok=True)

        current_folder_path = os.path.join(source_folder, folder)

        for filename in os.listdir(current_folder_path):
            # ALL FILES SHOULD HAVE .PNG AS YOU SHOULD BE ACCESSING THE RESIZED IMAGES
            source_path = os.path.join(current_folder_path, filename)
            destination_path = os.path.join(breed_folder, '.'.join(filename.split('.')[:-1]) + 'hflip.png')
            tasks.append((source_path, destination_path))

     # Use multiprocessing to resize images with progress feedback
    with Pool() as pool:
        for _ in tqdm(pool.imap(horizontal_flip_image, tasks), total=len(tasks)):
            pass  # Process images with progress bar

def rotate_images( source_folder: str,
                   destination_folder: str,
                   degrees: int):
    os.makedirs(destination_folder, exist_ok=True)  # Create destination folder if it doesn't exist

    tasks = []
    for folder in os.listdir(source_folder):
        breed_folder = os.path.join(destination_folder, folder)
        os.makedirs(breed_folder, exist_ok=True)

        current_folder_path = os.path.join(source_folder, folder)

        for filename in os.listdir(current_folder_path):
            # ALL FILES SHOULD HAVE .PNG AS YOU SHOULD BE ACCESSING THE RESIZED IMAGES
            source_path = os.path.join(current_folder_path, filename)
            destination_path = os.path.join(breed_folder, '.'.join(filename.split('.')[:-1]) + f'rotate{degrees}.png')
            tasks.append((source_path, destination_path, degrees))

     # Use multiprocessing to resize images with progress feedback
    with Pool() as pool:
        for _ in tqdm(pool.imap(rotate_image, tasks), total=len(tasks)):
            pass  # Process images with progress bar

source_folder = '/content/drive/My Drive/Chickens/train(224x224)/'
destination_folder = '/content/drive/My Drive/Chickens/train(Augment)/' # save all augmentations to the same area, i.e. all rotations, flips and zooms of a bufforpington will be saved in the respective bufforpington augment folder

# Using the augmentation functions to create extra data
rotate_images(source_folder, destination_folder, 15)
rotate_images(source_folder, destination_folder, 30)
rotate_images(source_folder, destination_folder, -15)
rotate_images(source_folder, destination_folder, -30)
vertical_flip_images(source_folder, destination_folder)
horizontal_flip_images(source_folder, destination_folder)
for i in range(7, 14, 1):
  brighten_images(source_folder, destination_folder, float(i)/10)


100%|██████████| 1111/1111 [00:34<00:00, 32.35it/s]
100%|██████████| 1111/1111 [00:36<00:00, 30.71it/s]
100%|██████████| 1111/1111 [00:36<00:00, 30.45it/s]
100%|██████████| 1111/1111 [00:35<00:00, 31.42it/s]
100%|██████████| 1111/1111 [00:35<00:00, 31.49it/s]
100%|██████████| 1111/1111 [00:42<00:00, 26.30it/s]
100%|██████████| 1111/1111 [00:38<00:00, 29.10it/s]
100%|██████████| 1111/1111 [00:37<00:00, 29.37it/s]
100%|██████████| 1111/1111 [00:38<00:00, 29.12it/s]
100%|██████████| 1111/1111 [00:36<00:00, 30.86it/s]
100%|██████████| 1111/1111 [00:36<00:00, 30.36it/s]
100%|██████████| 1111/1111 [00:37<00:00, 29.76it/s]
100%|██████████| 1111/1111 [00:35<00:00, 30.95it/s]
