<a href="https://colab.research.google.com/github/Rivero-Romero/Deep_Learning/blob/main/02_Preprocesamiento.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install kaggle library
!pip install kaggle

In [None]:
# Upload kaggle credential
from google.colab import files

uploaded = files.upload()


In [None]:
# Prepare kaggle environment
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Download dataset
!kaggle datasets download -d kaustubhb999/tomatoleaf

In [None]:
!unzip tomatoleaf.zip

In [None]:
!ls /content/

In [None]:
!unzip /content/tomatoleaf.zip -d /content/tomatoleaf

In [None]:
!ls /content/tomatoleaf

In [None]:

!rm -rf Deep_Learning
!git clone https://github.com/Rivero-Romero/Deep_Learning.git


In [None]:
!ls Deep_Learning


In [None]:
import pandas as pd

dataset_path = "Deep_Learning/transformed_categories.csv"
dataset_df = pd.read_csv(dataset_path)
dataset_df

In [None]:
from sklearn import preprocessing

# Converting labels from category to numeric
label_encoder = preprocessing.LabelEncoder()
dataset_df["CATEGORY_BIN_ENCODED"] = label_encoder.fit_transform(dataset_df["CATEGORY_BIN"])
dataset_df[["CATEGORY_BIN_ENCODED", "CATEGORY_BIN"]].drop_duplicates()

In [None]:
# Splitting dataframe columns into features
y, x = dataset_df["CATEGORY_BIN_ENCODED"], dataset_df["IMAGE_PATH"]

In [None]:
import cv2
import pandas as pd
import numpy as np
import multiprocessing
from pathlib import Path

# Definir las funciones de preprocesamiento

def basic_preprocessing(path):
    """
    Preprocesamiento básico: redimensionar y aplicar un filtro de mediana.
    """
    target_resolution = (256, 256)
    img_bgr = cv2.imread(path)
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    resize_image = cv2.resize(img_rgb, target_resolution)
    denoised_image = cv2.medianBlur(resize_image, 3)
    return denoised_image

def remove_median(path):
    """
    Preprocesamiento que resalta las diferencias respecto a la mediana de la imagen.
    """
    image = basic_preprocessing(path)
    return abs(image - np.median(image))

def scaling(path):
    """
    Preprocesamiento que centra la imagen y la normaliza.
    """
    image = basic_preprocessing(path)
    image_center = image.astype(np.float) - np.median(image)
    max_val = image_center.max()
    min_val = image_center.min()
    factor = 1 / (max_val - min_val)
    processed = factor * image_center
    return processed

def preprocess_images(image_paths, preprocessing_function):
    """
    Preprocesar un conjunto de imágenes usando una función de preprocesamiento dada.
    """
    processed_images = []
    for path in image_paths:
        processed_image = preprocessing_function(path)
        processed_images.append(processed_image)
    return processed_images

# Crear una lista con las rutas de las imágenes
image_paths = dataset_df['IMAGE_PATH'].tolist()

# Aplicar preprocesamiento a las imágenes utilizando multiprocessing (en paralelo)
def parallel_preprocessing(image_paths, preprocessing_function, num_processes=4):
    """
    Preprocesar las imágenes en paralelo.
    """
    chunk_size = len(image_paths) // num_processes
    chunks = [image_paths[i:i + chunk_size] for i in range(0, len(image_paths), chunk_size)]

    with multiprocessing.Pool(processes=num_processes) as pool:
        results = pool.starmap(preprocess_images, [(chunk, preprocessing_function) for chunk in chunks])

    # Unir los resultados de los diferentes procesos
    return [image for result in results for image in result]

# Usar multiprocessing para preprocesar las imágenes con 'basic_preprocessing'
preprocessed_images = parallel_preprocessing(image_paths, basic_preprocessing, num_processes=4)

#Mostrar una imagen procesada
import matplotlib.pyplot as plt

plt.imshow(preprocessed_images[0])
plt.show()


In [None]:
!pip install tensorflow==2.15

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

class CustomDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, image_paths, labels, batch_size, function, scaling = 255.0):
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size
        self.function = function
        self.indexes = np.arange(len(self.image_paths))
        self.scaling = scaling

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        start = index * self.batch_size
        end = (index + 1) * self.batch_size

        batch_image_paths = self.image_paths[start:end]
        batch_labels = self.labels[start:end]

        batch_images = [self.function(image_path) for image_path in batch_image_paths]
        batch_images = np.array(batch_images) / self.scaling  # Normalize pixel values

        return batch_images, np.array(batch_labels)

In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np

# Mostrar ejemplo de imagen con preprocesamiento

batch_size = 32

# 'basic_preprocessing' realiza la carga, cambio de tamaño y filtro de mediana a la imagen

data_generator = CustomDataGenerator(
      dataset_df["IMAGE_PATH"].values,  # Rutas de las imágenes
      dataset_df["CATEGORY_BIN"].values,  # Etiquetas
      batch_size,
      basic_preprocessing  # Función de preprocesamiento
    )

sample_index = 16  # El índice de la imagen a mostrar
batch_id = 0
index_data = 0

# Extraer la imagen procesada del generador
sample_image = data_generator[batch_id][index_data][sample_index]

# Preparar la imagen sin procesar (raw) y la procesada (denoised) para mostrar
sample_images = [
    (cv2.cvtColor(cv2.imread(dataset_df["IMAGE_PATH"].iloc[sample_index]), cv2.COLOR_BGR2RGB), "Raw Image"),  # Imagen sin procesar
    (sample_image, "Preprocessed Image")  # Imagen procesada
]

# Crear los subgráficos para mostrar ambas imágenes
fig, axs = plt.subplots(1, 2, figsize=(12, 6))  # 1 fila y 2 columnas

for i, ax in enumerate(axs.flat):
    image, title = sample_images[i]
    ax.imshow(image)
    ax.set_title(title)
    ax.axis('off')  # Ocultamos los ejes para una visualización más limpia

plt.tight_layout()  # Ajusta los márgenes automáticamente para que no se superpongan
plt.show()


In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.utils import Sequence

# Asegúrate de que estas funciones de preprocesamiento estén definidas en tu código
def basic_preprocessing(path):
    target_resolution = (256, 256)
    img_bgr = cv2.imread(path)
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    resize_image = cv2.resize(img_rgb, target_resolution)
    denoised_image = cv2.medianBlur(resize_image, 3)
    return denoised_image


def remove_median(path):
    # Preprocesamiento basado en la mediana
    image = basic_preprocessing(path)
    return abs(image - np.median(image))

# Definición de generador de datos personalizado
class CustomDataGenerator(Sequence):
    def __init__(self, image_paths, labels, batch_size, function, scaling=255.0):
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size
        self.function = function
        self.indexes = np.arange(len(self.image_paths))
        self.scaling = scaling

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        start = index * self.batch_size
        end = (index + 1) * self.batch_size

        batch_image_paths = self.image_paths[start:end]
        batch_labels = self.labels[start:end]

        batch_images = [self.function(image_path) for image_path in batch_image_paths]
        batch_images = np.array(batch_images) / self.scaling  # Normalización

        return batch_images, np.array(batch_labels)

# Cargando tus datos de imágenes y etiquetas desde el DataFrame (suponiendo que ya tienes estas columnas)
image_paths = dataset_df["IMAGE_PATH"].tolist()  # Ruta de las imágenes
labels = dataset_df["CATEGORY_BIN"].tolist()     # Etiquetas de las categorías (enfermedad/no enfermedad)

# Creando el generador de datos con el preprocesamiento adecuado
batch_size = 32
data_generator = CustomDataGenerator(
    image_paths,
    labels,
    batch_size,
    remove_median  # Usando la función de preprocesamiento "remove_median"
)

# Mostrar ejemplo de imagen procesada
sample_index = 16
batch_id = 0
index_data = 0
sample_image = data_generator[batch_id][index_data][sample_index]

# Crear los subgráficos para comparar
fig, axs = plt.subplots(1, 2)

# Mostrar la imagen original y la imagen procesada
sample_images = [
    (cv2.cvtColor(cv2.imread(dataset_df["IMAGE_PATH"].loc[sample_index]), cv2.COLOR_BGR2RGB), "Raw Image"),  # Imagen original
    (sample_image, "Image Without Median")  # Imagen después de aplicar el preprocesamiento
]

for i, ax in enumerate(axs.flat):
    image, title = sample_images[i]
    ax.imshow(image, cmap="gray")  # Usar escala de grises para resaltar detalles
    ax.set_title(title)
    ax.axis('off')

plt.show()
