# Pipeline de processamento e preparação de dados de imagens para treinamento

Este notebook unifica todo o código encontrado nos arquivos **"20240425 - KMeans Agro.ipynb"** e **"20240426_Pipeline_Básica_de_Processamento_de_Imagem.ipynb"**. Esses arquivos apresentam duas implementações distintas, porém valiosas para o projeto. Neste momento inicial, optamos por manter essas duas abordagens diferentes para realizar testes na Sprint 2 com as modelagens iniciais das Redes Neurais Convolucionais. Contudo, é esperado que na próxima Sprint 2, unifiquemos esse processo em um só.

Com o objetivo de preservar a inteligibilidade deste arquivo, optamos por manter os testes unitários exclusivamente em seus respectivos arquivos, Kmeans Agro e Pipeline Básica.

## 1. Implementação utilizando KMeans

In [None]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import tqdm

class KMeansImageProcessingPipeline:
	def __init__(self, base_dir):
		self.base_dir = base_dir
    
	def read_and_process_image(self, path):
		img = cv2.imread(path)
		img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
		return img

	def kmeans_image(self, img, k, attempts=10):
		# Redimensionar a imagem para um vetor 1D
		vectorized_img = img.reshape((-1,3))
		vectorized_img = np.float32(vectorized_img)

		# Definir critérios para o algoritmo K-means
		criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)

		# Executar o algoritmo K-means
		ret, label, center = cv2.kmeans(vectorized_img, k, None, criteria, attempts, cv2.KMEANS_PP_CENTERS)
		center = np.uint8(center)

		# Recriar a imagem a partir dos clusters encontrados
		res = center[label.flatten()]
		result_image = res.reshape((img.shape))

		return result_image

	def sharpen_image(self, img):
		# Cria um kernel para afiar a imagem
		kernel = np.array([[-1,-1,-1],
											[-1,9,-1],
											[-1,-1,-1]])

		# Aplica o kernel na imagem usando filter2D
		sharpened_image = cv2.filter2D(img, -1, kernel)

		return sharpened_image

	def process_all_images(self, k, attempts=10):
		processed_images = []
			
		images = os.listdir(self.base_dir)

		for image_name in tqdm.tqdm(images):
			path = os.path.join(self.base_dir, image_name)
			img = self.read_and_process_image(path)

			# Aplica o algoritmo de k-means na imagem
			kmeans_img = self.kmeans_image(img, k, attempts)
			
			# Afia a imagem resultante
			sharpened_img = self.sharpen_image(kmeans_img)

			# Adiciona a imagem afiada à lista de retorno
			processed_images.append(sharpened_img)

		return processed_images

	def analyze_kmeans_inertia(self, k_range):
		sample_image_path = os.path.join(self.base_dir, os.listdir(self.base_dir)[0])
		img = self.read_and_process_image(sample_image_path)
		vectorized_img = img.reshape((-1, 3))
		vectorized_img = np.float32(vectorized_img)
		
		inertias = []
		for k in k_range:
				kmeans = KMeans(n_clusters=k, random_state=42)
				kmeans.fit(vectorized_img)
				inertias.append(kmeans.inertia_)
		
		plt.figure(figsize=[20, 5])
		plt.subplot(1, 2, 1)
		plt.plot(k_range, inertias, "-o")
		plt.xlabel("$k$", fontsize=14)
		plt.ylabel("Inertia", fontsize=14)
		plt.grid(True)
		
		plt.subplot(1, 2, 2)
		plt.plot(k_range[:-1], np.diff(inertias), "-o")
		plt.xlabel("$k$", fontsize=14)
		plt.ylabel("Change in inertia", fontsize=14)
		plt.grid(True)
		plt.show()

# Usage
base_dir = '../../data/dataset_inteli_test/tci_pngs'
pipeline = KMeansImageProcessingPipeline(base_dir)
processed_images = pipeline.process_all_images(5)  # Change k as needed
pipeline.analyze_kmeans_inertia(range(4, 16))


In [None]:
for img in processed_images:
		plt.imshow(img)
		plt.show()

Mais informações e testes unitários em: [Arquivo KMeans](../notebooks/20240425%20-%20KMeans%20Agro.ipynb)

## 2. Visualização de imagens: PCA e Média

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

class ImageVisualization:
  def __init__(self, image_paths):
    self.images = []
    self.target_size = (1200, 1200)

    for path in image_paths:
      image = self.load_image(path)

      if image is not None:
        self.images.append(image)

  def load_image(self, path):
    image = cv2.imread(path, cv2.IMREAD_UNCHANGED)

    if image is None:
      print(f"Failed to load image: {path}")
      return None

    if image.shape[:2] != self.target_size:
      image = cv2.resize(image, self.target_size[::-1])

    return image


  def merge_images(self):
    if not self.images:
      raise ValueError("Nenhuma imagem foi carregada no pipeline.")

    # Cada pixel na imagem resultante é a média dos pixels correspondentes de todas as imagens.
    merged_image = np.mean(self.images, axis=0, dtype=np.float32)
    
    # normalizando para valores de 0 a 255
    merged_image = cv2.normalize(merged_image, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)

    return merged_image

  def merge_images_pca(self, n_components=3):
    if not self.images:
      raise ValueError("Nenhuma imagem foi carregada no pipeline.")

    # Empilhar todas as imagens em uma matriz 2D (pixels x bandas)
    data = np.stack([img.ravel() for img in self.images], axis=-1)

    pca = PCA(n_components=n_components)
    principal_components = pca.fit_transform(data)

    merged_image = principal_components.reshape(self.images[0].shape[:2] + (n_components,))
    merged_image = cv2.normalize(merged_image, None, 0, 255, norm_type=cv2.NORM_MINMAX).astype(np.uint8)

    return merged_image

  # O método abaixo permite visualizar a imagem com a resulução de seu tamanho
  def view_image(self, image):
    dpi = 100  
    height, width = image.shape[:2]
    figsize = width / float(dpi), height / float(dpi)  

    plt.figure(figsize=figsize, dpi=dpi)  

    if image.ndim == 2 or image.shape[2] == 1:
      plt.imshow(image, cmap='gray')  

    else:
      plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    plt.axis('off')
    plt.show()

### 2.1 - Criando imagens para utilizar na etapa 3

In [None]:
# Imagens Utilizando o método merge_images
image_paths = ['../../data/dataset_inteli_test/images/595_2019-8-14_S2L1C_21JYJ/b11.tif', '../../data/dataset_inteli_test/images/595_2019-8-14_S2L1C_21JYJ/b12.tif', '../../data/dataset_inteli_test/images/595_2019-8-14_S2L1C_21JYJ/b2.tif', '../../data/dataset_inteli_test/images/595_2019-8-14_S2L1C_21JYJ/b3.tif', '../../data/dataset_inteli_test/images/595_2019-8-14_S2L1C_21JYJ/b4.tif', '../../data/dataset_inteli_test/images/595_2019-8-14_S2L1C_21JYJ/b5.tif', '../../data/dataset_inteli_test/images/595_2019-8-14_S2L1C_21JYJ/b6.tif', '../../data/dataset_inteli_test/images/595_2019-8-14_S2L1C_21JYJ/b7.tif', '../../data/dataset_inteli_test/images/595_2019-8-14_S2L1C_21JYJ/b8.tif', '../../data/dataset_inteli_test/images/595_2019-8-14_S2L1C_21JYJ/b8a.tif' ]

image_viz = ImageVisualization(image_paths)

merged_image_mean = image_viz.merge_images()
image_viz.view_image(merged_image_mean)

In [None]:
# Imagens utilizando o método merge_images_pca
merged_image_pca = image_viz.merge_images_pca()
image_viz.view_image(merged_image_pca)

## 3. Pipeline de processamento

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

class ImageProcessingPipeline:
	def __init__(self, images):
		self.images = images  

	@staticmethod
	def normalize_image(image):
		return image / 255.0

	@staticmethod
	def resize_image(image, target_size=(1200, 1200)):
		return cv2.resize(image, target_size)

	def apply_clahe(self, image):
		clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
		return clahe.apply(image.astype(np.uint8))

	@staticmethod
	def crop_image(image, crop_size=(200, 200)):
		crops = []

		for i in range(0, image.shape[0], crop_size[0]):

			for j in range(0, image.shape[1], crop_size[1]):
					crop = image[i:i+crop_size[0], j:j+crop_size[1]]

					if crop.shape[0] == crop_size[0] and crop.shape[1] == crop_size[1]:
						crops.append(crop)

		return crops

	def augment_images(self, image):
		aug_images = []

		for angle in [0, 90, 180, 270]:
			rotated = self.rotate_image(image, angle)
			aug_images.append(rotated)
			aug_images.append(cv2.flip(rotated, 1))
			
		return aug_images

	@staticmethod
	def rotate_image(image, angle):
		(h, w) = image.shape[:2]
		center = (w // 2, h // 2)

		M = cv2.getRotationMatrix2D(center, angle, 1.0)

		return cv2.warpAffine(image, M, (w, h))

	def process_images(self):
		processed_images = []

		for img in self.images:
			if img.ndim == 1:
				clahe_img = self.apply_clahe(img)
			else:
				clahe_img = img
			
			norm_img = ImageProcessingPipeline.normalize_image(clahe_img)
			resized_img = ImageProcessingPipeline.resize_image(norm_img)

			cropped_images = self.crop_image(resized_img)

			for crop in cropped_images:
				augmented_imgs = self.augment_images(crop)
				processed_images.extend(augmented_imgs)

		return processed_images

	def show_image(self, image):
			plt.imshow(image, cmap='gray')
			plt.axis('off')
			plt.show()

### 3.1 - Aplicando a pipeline para imagens geradas através do KMeans

In [None]:
pipeline = ImageProcessingPipeline(processed_images)
processed_images_pipeline = pipeline.process_images()

# Mostrar algumas das imagens processadas
for img in processed_images_pipeline[:8]:  # Mostra as primeiras 8 imagens processadas
    pipeline.show_image(img)

len(processed_images_pipeline)

### 3.2 - Aplicando a pipeline para imagens geradas através da média dos pixeis entre bandas

In [None]:
pipeline_mean_image = ImageProcessingPipeline([merged_image_mean])
processed_images_pipeline_2 = pipeline_mean_image.process_images()

# Mostrar algumas das imagens processadas
for img in processed_images_pipeline_2[:8]:  # Mostra as primeiras 8 imagens processadas
    pipeline.show_image(img)

len(processed_images_pipeline_2)

### 3.3 - Aplicando a pipeline para imagens geradas a partir do PCA

In [None]:
pipeline_pca_image = ImageProcessingPipeline([merged_image_pca])
processed_images_pipeline_3 = pipeline_pca_image.process_images()

# Mostrar algumas das imagens processadas
for img in processed_images_pipeline_3[:8]:  # Mostra as primeiras 8 imagens processadas
    pipeline.show_image(img)

len(processed_images_pipeline_3)

Mais informações e testes unitários em: [Arquivo Pipeline Básica](../notebooks/20240426_Pipeline_Básica_de_Processamento_de_Imagem.ipynb)