# 1 - Clean images & augment them

In [4]:
import os
from PIL import Image
import imgaug.augmenters as iaa
import numpy as np
from tqdm import tqdm  # Importation de tqdm pour la barre de progression
from multiprocessing import Pool, cpu_count

source_directory = '../data/pokemon'
target_directory = '../data/pokemon-augmented'

def convert_images_to_jpeg(source_dir, target_dir):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    for root, dirs, files in os.walk(source_dir):
        current_dir_name = os.path.basename(root)
        for file in tqdm(files, desc=f"Conversion ({current_dir_name})", unit="file"):
            if file.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'gif', 'tiff')):
                file_path = os.path.join(root, file)
                with Image.open(file_path) as img:
                    img = img.convert('RGB')
                    relative_path = os.path.relpath(root, source_dir)
                    target_subdir = os.path.join(target_dir, relative_path)
                    if not os.path.exists(target_subdir):
                        os.makedirs(target_subdir)
                    
                    target_file_path = os.path.join(target_subdir, f"{os.path.splitext(file)[0]}.jpg")
                    img.save(target_file_path, 'JPEG')

def augment_image_task(args):
    """Wrapper function to handle arguments for multiprocessing."""
    file_path, aug, num_augmentations = args
    with Image.open(file_path) as img:
        img_array = np.array(img)

        augmented_images = []
        for i in range(num_augmentations):
            augmented_image = aug(image=img_array)
            augmented_images.append((Image.fromarray(augmented_image), f"{os.path.splitext(file_path)[0]}_aug{i}.jpg"))

        return augmented_images

def augment_images(directory, num_augmentations=3):
    aug = iaa.Sequential([
        iaa.Fliplr(0.3),  
        iaa.Flipud(0.1), 
        iaa.Affine(
            scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},  
            translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
            rotate=(-25, 25),  
            shear=(-8, 8)  
        ),
        iaa.GaussianBlur(sigma=(0, 1.0)),  
        iaa.AdditiveGaussianNoise(scale=(0, 0.05*255)),  
        iaa.AdditiveLaplaceNoise(scale=(0, 0.02*255)),  
        iaa.AdditivePoissonNoise(lam=(0, 40)),  
        iaa.Multiply((0.8, 1.2)),  
        iaa.LinearContrast((0.8, 1.2))  
    ])

    tasks = []
    for root, dirs, files in os.walk(directory):
        current_dir_name = os.path.basename(root)
        for file in files:
            if file.lower().endswith(('jpg', 'jpeg')):
                file_path = os.path.join(root, file)
                tasks.append((file_path, aug, num_augmentations))

    # Utiliser multiprocessing pour traiter les images en parallèle
    with Pool(processes=cpu_count()) as pool:
        for augmented_images in tqdm(pool.imap_unordered(augment_image_task, tasks), total=len(tasks), desc="Augmentation images", unit="file"):
            for aug_img_pil, aug_file_path in augmented_images:
                aug_img_pil.save(aug_file_path)

In [5]:
convert_images_to_jpeg(source_directory, target_directory)

Conversion (pokemon): 0file [00:00, ?file/s]
Conversion (Vulpix): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 142/142 [00:01<00:00, 130.34file/s]
Conversion (Sandshrew): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 98/98 [00:00<00:00, 248.69file/s]
Conversion (Charizard): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 105/105 [00:01<00:00, 65.25file/s]
Conversion (Articuno): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 94/94 [00:00<00:00, 169.01file/s]
Conversion (Rhydon): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 122/122 [00:00<00:00, 221.34file/s]
Conversion (Charmander): 100%|█████████████████████████████████████████████████████████████████

In [6]:
augment_images('../data/pokemon-augmented', 3)

Augmentation images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 17182/17182 [10:35<00:00, 27.04file/s]


# 2 - Show how it augmented the images

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def display_augmented_images(directory):
    count = 0
    for root, dirs, files in os.walk(directory):
        for dir in dirs[:6]: 
            dir_path = os.path.join(root, dir)
            augmented_images = []
            for file in os.listdir(dir_path):
                if file.lower().endswith(('jpg', 'jpeg')) and 'aug' in file:
                    file_path = os.path.join(dir_path, file)
                    img = Image.open(file_path)
                    augmented_images.append(np.array(img))
                if len(augmented_images) == 3: 
                    break
            
            if augmented_images:
                fig, axs = plt.subplots(1, 3, figsize=(15, 5))
                for ax, img in zip(axs, augmented_images):
                    ax.imshow(img)
                    ax.axis('off')
                plt.show()
                count += 1
            if count >= 6:
                return

In [None]:
display_augmented_images(target_directory)

# 3 - Resize images

In [7]:
from PIL import Image
import os
from tqdm import tqdm  # Importation de tqdm pour la barre de progression
from multiprocessing import Pool, cpu_count

def ensure_jpg_extension(output_path):
    root, ext = os.path.splitext(output_path)
    if ext.lower() != '.jpg':
        output_path = root + '.jpg'
    return output_path

def resize_image_task(args):
    """Wrapper function to handle arguments for multiprocessing."""
    input_path, output_path, size, quality = args
    with Image.open(input_path) as img:
        try :
            img = img.convert('RGB')
            img = img.resize(size, Image.LANCZOS)
            img.save(ensure_jpg_extension(output_path), optimize=True, quality=quality)
        except :
            print(input_path)

def resize_images_in_directory(input_directory, output_directory, size=(224, 224), quality=85):
    tasks = []
    for root, dirs, files in os.walk(input_directory):
        current_dir_name = os.path.basename(root)  # Obtenir le nom du répertoire courant
        for filename in files:
            if filename.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                input_path = os.path.join(root, filename)
                relative_path = os.path.relpath(root, input_directory)
                output_folder = os.path.join(output_directory, relative_path)
                if not os.path.exists(output_folder):
                    os.makedirs(output_folder)
                output_path = os.path.join(output_folder, filename)
                tasks.append((input_path, output_path, size, quality))
    
    # Utiliser multiprocessing pour traiter les images en parallèle
    with Pool(processes=cpu_count()) as pool:
        for _ in tqdm(pool.imap_unordered(resize_image_task, tasks), total=len(tasks), desc="Resizing images", unit="file"):
            pass

input_directory = '../data/pokemon-augmented'
output_directory = '../data/pokemon-128augmented'
resize_images_in_directory(input_directory, output_directory, (128, 128), quality=100)


Resizing images: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 68728/68728 [01:04<00:00, 1067.58file/s]
