# Data augmentation 

A seguir, estamos combinando o código gerado nas sprints anteriores com o processo de mesclagem de imagens através da média dos pixels e com o processo de Data Augmentation, que foi atualizado desde a sprint 1. Removemos alguns filtros e implementamos códigos necessários para garantir que o conjunto de imagens seja eficaz.

- [Pré Processamento de imagens SPRINT 1](../../SPRINT%201/20240426%20-%20Imagens%20TIF%20e%20Data%20Augmentation.ipynb)
- [Data Augmentation SPRINT 3](../../SPRINT%203/DATA%20AUGMENTATION/20240524%20-%20Data%20Augmentation.ipynb)

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from google.colab import drive
import os
from PIL import Image

# Montar o Google Drive
drive.mount('/content/drive')

class ImageMerge:
    def __init__(self, root_dir, output_dir):
        self.root_dir = root_dir
        self.output_dir = output_dir
        self.target_size = (600, 600)

    # Função para carregar uma imagem a partir de um caminho especificado
    def load_image(self, path):
        image = cv2.imread(path, cv2.IMREAD_UNCHANGED)
        if image is None:
            print(f"Falha ao carregar a imagem: {path}")
            return None
        if image.shape[:2] != self.target_size:
            image = cv2.resize(image, self.target_size[::-1])
        return image

    # Processar diretórios de imagens
    def process_directory(self):
        create_dirs(self.output_dir)
        merged_images = []
        for subdir, _, files in os.walk(self.root_dir):
            tif_files = sorted([os.path.join(subdir, file) for file in files if file.endswith('.tif')])
            if tif_files:
                images = [self.load_image(file) for file in tif_files if self.load_image(file) is not None]
                if images:
                    merged_image = self.merge_images(images)
                    clahe_image = self.apply_clahe(merged_image)
                    directory_name = os.path.basename(subdir)
                    image_name = directory_name.split('_')[0]
                    output_path = os.path.join(self.output_dir, f"{image_name}.png")
                    cv2.imwrite(output_path, clahe_image)
                    merged_images.append(clahe_image)
        return merged_images

    # Função para mesclar múltiplas imagens
    def merge_images(self, images):
        if not images:
            raise ValueError("Nenhuma imagem foi carregada no pipeline.")
        merged_image = np.mean(images, axis=0, dtype=np.float32)
        merged_image = cv2.normalize(merged_image, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
        return merged_image

    # Aplicar CLAHE (Contrast Limited Adaptive Histogram Equalization) para melhorar o contraste
    def apply_clahe(self, image):
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        return clahe.apply(image.astype(np.uint8))

# Função para criar diretórios se não existirem
def create_dirs(path):
    if not os.path.exists(path):
        os.makedirs(path)

# Caminhos
root_dir = '/content/drive/Shared drives/Grupo T de Tech/Data/dataset_inteli/images'
output_dir = '/content/drive/Shared drives/Grupo T de Tech/Data/dataset_inteli/merged_images'

# Mesclar imagens e salvá-las
pipe = ImageMerge(root_dir, output_dir)
merged_images = pipe.process_directory()

In [None]:
# Carregar as máscaras correspondentes para as imagens mescladas e redimensionar para 600x600
def load_masks(masks_dir, target_size=(600, 600)):
    masks = []
    ordered_masks = sorted(os.listdir(masks_dir))
    mask_filenames = []
    for mask_name in ordered_masks:
        mask_path = os.path.join(masks_dir, mask_name)
        mask = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
        if mask is not None:
            if mask.shape[:2] != target_size:
                mask = cv2.resize(mask, target_size[::-1])
            masks.append(mask)
            mask_filenames.append(os.path.splitext(mask_name)[0])
        else:
            print(f"Falha ao carregar a máscara: {mask_path}")
    return masks, mask_filenames

# Função para carregar imagens
def load_images(image_dir, target_size=(600, 600)):
    images = []
    ordered_images = sorted(os.listdir(image_dir))
    image_filenames = []
    for image_name in ordered_images:
        image_path = os.path.join(image_dir, image_name)
        image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
        if image is not None:
            if image.shape[:2] != target_size:
                image = cv2.resize(image, target_size[::-1])
            images.append(image)
            image_filenames.append(os.path.splitext(image_name)[0])
        else:
            print(f"Falha ao carregar a imagem: {image_path}")
    return images, image_filenames

# Caminhos
masks_dir = '/content/drive/Shared drives/Grupo T de Tech/Data/dataset_inteli/masks'
image_dir = '/content/drive/Shared drives/Grupo T de Tech/Data/dataset_inteli/merged_images'

# Carregar imagens e máscaras
masks, mask_filenames = load_masks(masks_dir)
images, image_filenames = load_images(image_dir)

In [None]:
# Garante que as máscaras e imagens estão na mesma ordem
assert mask_filenames == image_filenames, "A ordem das máscaras e imagens não coincide!"

class ImageProcessingPipeline:
    def __init__(self, images, masks):
        self.images = images
        self.masks = masks

    def crop_image(self, image, crop_size=(200, 200)):
        # Método para recortar uma imagem em pedaços menores
        crops = []
        for i in range(0, image.shape[0], crop_size[0]):
            for j in range(0, image.shape[1], crop_size[1]):
                crop = image[i:i+crop_size[0], j:j+crop_size[1]]
                if crop.shape[0] == crop_size[0] and crop.shape[1] == crop_size[1]:
                    crops.append(crop)
        return crops

    def augment_images(self, image):
        # Método para aumentar as imagens (rotação e espelhamento)
        aug_images = []
        for angle in [0, 90, 180, 270]:
            rotated = self.rotate_image(image, angle)
            aug_images.append(rotated)
            aug_images.append(cv2.flip(rotated, 1))
        return aug_images

    @staticmethod
    def rotate_image(image, angle):
        # Método para rotacionar uma imagem
        (h, w) = image.shape[:2]
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, angle, 1.0)
        return cv2.warpAffine(image, M, (w, h))

    def process_and_save_images_and_masks(self, output_dir):
        # Processa e salva as imagens e máscaras em um diretório de saída
        create_dirs(output_dir)
        count = 0
        for img, mask in zip(self.images, self.masks):
            cropped_images = self.crop_image(img)
            cropped_masks = self.crop_image(mask)
            for crop_img, crop_mask in zip(cropped_images, cropped_masks):
                augmented_imgs = self.augment_images(crop_img)
                augmented_masks = self.augment_images(crop_mask)
                for aug_img, aug_mask in zip(augmented_imgs, augmented_masks):
                    aug_img = Image.fromarray((aug_img * 255).astype(np.uint8))  # Converte de volta para uint8
                    aug_mask = Image.fromarray(aug_mask)
                    aug_img.save(os.path.join(output_dir, f'processed_image_{count}.tif'))
                    aug_mask.save(os.path.join(output_dir, f'processed_mask_{count}.png'))
                    count += 1

    def show_image(self, image):
        # Método para exibir uma imagem
        plt.imshow(image, cmap='gray')
        plt.axis('off')
        plt.show()

# Processa e aumenta imagens e máscaras
output_dir = '/content/drive/Shared drives/Grupo T de Tech/Data/dataset_inteli/dataset_merged_images_1'
pipeline = ImageProcessingPipeline(images, masks)
pipeline.process_and_save_images_and_masks(output_dir)

# Mostra algumas das imagens processadas
for img in images[:8]:  # Mostra as primeiras 8 imagens processadas
    pipeline.show_image(img)


Caso haja necessidade de executar o código, devido ao peso dos datasets optamos por mante-los no google drive, contudo podem ser acessados através dos links abaixo:

- [Conjunto de imagens geradas através do pré processamento de imagens TIF e Data Augmentation](https://drive.google.com/drive/folders/11svbYLuN3Ch8KJOUE1tyZvUWrRD8Nhcs?usp=sharing)