In [None]:
import os
import numpy as np
import cv2
from albumentations import (
    HorizontalFlip, VerticalFlip, RandomRotate90, RandomBrightnessContrast,
    ElasticTransform, GridDistortion, ShiftScaleRotate
)
from albumentations.core.composition import Compose
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import shutil

def augment_dataset(image_dir, mask_dir, save_dir, max_augmentations=2646):
    """Augmenter le dataset jusqu'à un maximum d'images spécifié."""
    # Liste des transformations
    transforms = Compose([
        HorizontalFlip(p=0.5),
        VerticalFlip(p=0.5),
        RandomRotate90(p=0.5),
        RandomBrightnessContrast(p=0.5),
        ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, approximate=True),
        GridDistortion(p=0.5),
        ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5)
    ])

    # Charger les chemins d’images et de masques
    image_paths = sorted([os.path.join(image_dir, img) for img in os.listdir(image_dir)])
    mask_paths = sorted([os.path.join(mask_dir, mask) for mask in os.listdir(mask_dir)])

    # Créer des dossiers pour sauvegarder les augmentations
    os.makedirs(os.path.join(save_dir, "images"), exist_ok=True)
    os.makedirs(os.path.join(save_dir, "masks"), exist_ok=True)

    # Compteur total
    total_augmentations = 0

    # Boucler sur chaque image et son masque
    for img_path, mask_path in tqdm(zip(image_paths, mask_paths), total=len(image_paths)):
        image = cv2.imread(img_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        # Ajouter des augmentations tant que le total n’est pas atteint
        while total_augmentations < max_augmentations:
            augmented = transforms(image=image, mask=mask)
            aug_image = augmented["image"]
            aug_mask = augmented["mask"]

            # Sauvegarder les augmentations
            img_name = f"aug_{total_augmentations}.png"
            mask_name = f"aug_{total_augmentations}.png"
            cv2.imwrite(os.path.join(save_dir, "images", img_name), aug_image)
            cv2.imwrite(os.path.join(save_dir, "masks", mask_name), aug_mask)

            total_augmentations += 1

            # Arrêter si on atteint le maximum
            if total_augmentations >= max_augmentations:
                break

    print(f"Augmentation terminée. Total des images : {total_augmentations}")


def split_train_valid(augmented_data_path, train_output_path, valid_output_path, test_size=0.2):
    """Diviser le dataset augmenté en Train et Valid."""
    # Créer les dossiers Train et Valid
    os.makedirs(os.path.join(train_output_path, "images"), exist_ok=True)
    os.makedirs(os.path.join(train_output_path, "masks"), exist_ok=True)
    os.makedirs(os.path.join(valid_output_path, "images"), exist_ok=True)
    os.makedirs(os.path.join(valid_output_path, "masks"), exist_ok=True)

    # Charger toutes les images et masques
    images = sorted(glob(os.path.join(augmented_data_path, "images", "*.png")))
    masks = sorted(glob(os.path.join(augmented_data_path, "masks", "*.png")))

    # Vérification
    assert len(images) == len(masks), "Le nombre d'images et de masques doit être identique"

    # Diviser les données en Train et Valid
    train_images, valid_images, train_masks, valid_masks = train_test_split(
        images, masks, test_size=test_size, random_state=42
    )

    # Copier les fichiers dans les dossiers correspondants
    def copy_files(file_list, dest_folder):
        for file_path in file_list:
            file_name = os.path.basename(file_path)
            shutil.copy(file_path, os.path.join(dest_folder, file_name))

    copy_files(train_images, os.path.join(train_output_path, "images"))
    copy_files(train_masks, os.path.join(train_output_path, "masks"))
    copy_files(valid_images, os.path.join(valid_output_path, "images"))
    copy_files(valid_masks, os.path.join(valid_output_path, "masks"))

    print(f"Nombre d'images d'entraînement : {len(train_images)}")
    print(f"Nombre d'images de validation : {len(valid_images)}")


# Définir les chemins
image_dir = "/content/drive/MyDrive/DATASET/DATASETNOUR/Training/images"
mask_dir = "/content/drive/MyDrive/DATASET/DATASETNOUR/Training/masks"
save_dir = "/content/drive/MyDrive/DATASET/Augmented_Dataset"
train_output_path = os.path.join(save_dir, "train")
valid_output_path = os.path.join(save_dir, "valid")

# Étape 1 : Augmenter le dataset
augment_dataset(image_dir, mask_dir, save_dir, max_augmentations=2646)

# Étape 2 : Diviser en Train et Valid
split_train_valid(save_dir, train_output_path, valid_output_path)


  0%|          | 0/42 [31:20<?, ?it/s]


KeyboardInterrupt: 