# **Proyecto - Plant Status**

Para este proyecto, se utilizó el dataset disponible en la plataforma Kaggle, específicamente el titulado https://www.kaggle.com/datasets/abdallahalidev/plantvillage-dataset/data Este conjunto de datos contiene imágenes de diversas plantas en diferentes estados de salud, incluyendo tanto condiciones patológicas como muestras saludables.

Cabe mencionar que algunas de las clases presentes en el dataset únicamente incluyen imágenes de plantas en estado saludable. A pesar de esta limitación, se decidió continuar trabajando con dichas clases para mantener la diversidad de especies vegetales representadas en el conjunto de datos.

Posteriormente, se realizó una reorganización del dataset con el fin de facilitar el procesamiento y la clasificación. Para ello, se agruparon las imágenes según el tipo de planta, creando una estructura de carpetas nombradas con el nombre correspondiente a cada especie. Dentro de cada una de estas carpetas se almacenaron las imágenes clasificadas por su estado, lo que permite una manipulación más ordenada y eficiente durante el desarrollo del modelo.

## ***Configuración del entorno y Extracción***

### ***Verificación y uso de GPU (CUDA) para el procesamiento de imágenes***

Ahora, en el siguiente fragmento de código se realiza una verificación del entorno para comprobar si CUDA está disponible. Esto nos permite utilizar la GPU personal para acelerar el procesamiento de imágenes durante el entrenamiento del modelo:

In [14]:
import torch

print("¿CUDA disponible?:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("✅ GPU detectada:", torch.cuda.get_device_name(0))
else:
    print("❌ No se detectó GPU")


¿CUDA disponible?: True
✅ GPU detectada: NVIDIA GeForce RTX 3050


### ***Definición y verificación de la ruta del dataset***


Antes de cargar las imágenes del dataset, es fundamental asegurarse de que la ruta hacia la carpeta que contiene los datos esté correctamente definida. En el siguiente fragmento de código, se especifica la ruta local donde se encuentra almacenado el dataset y se verifica su existencia en el sistema. Esto permite detectar posibles errores tempranamente si la ruta es incorrecta o si los archivos no se han descargado adecuadamente.

In [15]:
import os

# Definir la ruta del dataset
dataset_path = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"

# Verificar que la carpeta existe
if os.path.exists(dataset_path):
    print(f"¡El dataset se encontró en: {dataset_path}!")
else:
    print(f"Error: No se encontró la carpeta en {dataset_path}. Verifica la ruta.")

¡El dataset se encontró en: C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset!


## ***Análisis exploratorio***

### ***Estructura del dataset***

Ver con que datos o carpetas se esta trabajando a lo largo de este colab

In [4]:
import os

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"

def list_folders_clean(directory):
    # Verificar que la carpeta existe
    if not os.path.exists(directory):
        print(f"Error: No se encontró la carpeta en {directory}.")
        return
    
    visited = set()
    folders_by_origin = {"color": [], "grayscale": [], "segmented": []}
    
    # Recorrer el directorio
    for root, dirs, _ in os.walk(directory):
        depth = root[len(directory):].count(os.sep)
        folder_name = os.path.basename(root)
        if root == directory:
            folder_name = "plantvillage dataset"
        
        full_path = os.path.normpath(os.path.join(root))
        if full_path not in visited and depth >= 1:
            visited.add(full_path)
            
            # Determinar el tipo (color, grayscale, segmented)
            origin = ""
            parent_path = os.path.dirname(root)
            parent_name = os.path.basename(parent_path)
            if depth == 1:
                origin = folder_name
            elif depth >= 2 and parent_name in ["color", "grayscale", "segmented"]:
                origin = parent_name
            elif depth >= 2:
                grandparent_path = os.path.dirname(parent_path)
                grandparent_name = os.path.basename(grandparent_path)
                if grandparent_name in ["color", "grayscale", "segmented"]:
                    origin = grandparent_name
            
            # Guardar carpetas de nivel 2 o mayor
            if depth >= 2 and origin:
                is_simple = "___" not in folder_name
                folders_by_origin[origin].append((depth, folder_name, is_simple))
    
    # Imprimir carpeta raíz
    print(f"📁 plantvillage dataset")
    print(f"{'=' * 50}")
    
    # Imprimir carpetas por tipo
    for origin in ["color", "grayscale", "segmented"]:
        if folders_by_origin[origin]:
            print(f"\n{origin.upper()}")
            print(f"{'=' * 50}")
            for depth, folder_name, is_simple in sorted(folders_by_origin[origin], key=lambda x: x[1]):
                indent = "  " * (depth - 1)
                icon = "🌱" if is_simple else "🍃"
                print(f"{indent}{icon} {folder_name}")

if __name__ == "__main__":
    list_folders_clean(DATASET_PATH)

📁 plantvillage dataset

COLOR
  🌱 Apple
    🍃 Apple___Apple_scab
    🍃 Apple___Black_rot
    🍃 Apple___Cedar_apple_rust
    🍃 Apple___healthy
  🌱 Blueberry
    🍃 Blueberry___healthy
  🌱 Cherry
    🍃 Cherry_(including_sour)___Powdery_mildew
    🍃 Cherry_(including_sour)___healthy
  🌱 Corn
    🍃 Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot
    🍃 Corn_(maize)___Common_rust_
    🍃 Corn_(maize)___Northern_Leaf_Blight
    🍃 Corn_(maize)___healthy
  🌱 Grape
    🍃 Grape___Black_rot
    🍃 Grape___Esca_(Black_Measles)
    🍃 Grape___Leaf_blight_(Isariopsis_Leaf_Spot)
    🍃 Grape___healthy
  🌱 Orange
    🍃 Orange___Haunglongbing_(Citrus_greening)
  🌱 Peach
    🍃 Peach___Bacterial_spot
    🍃 Peach___healthy
  🌱 Pepper
    🍃 Pepper,_bell___Bacterial_spot
    🍃 Pepper,_bell___healthy
  🌱 Potato
    🍃 Potato___Early_blight
    🍃 Potato___Late_blight
    🍃 Potato___healthy
  🌱 Raspberry
    🍃 Raspberry___healthy
  🌱 Soybean
    🍃 Soybean___healthy
  🌱 Squash
    🍃 Squash___Powdery_mildew
  🌱 Stra

El dataset PlantVillage contiene imágenes de múltiples especies de plantas en tres formatos: color, escala de grises y segmentado. Cada especie incluye diferentes estados, que abarcan desde plantas saludables hasta diversas enfermedades comunes.

Este conjunto de datos ofrece una amplia variedad de condiciones para el entrenamiento y evaluación de modelos de clasificación y diagnóstico de enfermedades en plantas, lo que lo convierte en un recurso valioso para proyectos de aprendizaje automático en agricultura.

### ***Formato o extension de las imagenes***

Se verificó la extensión de las imágenes en el dataset y, para nuestro trabajo, se utilizarán únicamente las imágenes con extensión .jpg.

In [11]:
import os
from collections import defaultdict

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"

def analyze_image_extensions(directory):
    # Verificar que la carpeta existe
    if not os.path.exists(directory):
        print(f"Error: No se encontró la carpeta en {directory}.")
        return
    
    # Almacenar conteo de extensiones
    ext_counts = defaultdict(int)
    total_files = 0
    file_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif')
    
    # Recorrer el directorio
    for root, _, files in os.walk(directory):
        for file in files:
            ext = os.path.splitext(file)[1].lower()
            if ext in file_extensions:
                ext_counts[ext] += 1
                total_files += 1
    
    # Imprimir resumen
    print(f"{'=' * 30}")
    print("Extensiones de Imágenes")
    print(f"{'=' * 30}")
    print(f"{'Extensión':<10} {'Conteo':>10}")
    print("-" * 22)
    for ext in sorted(ext_counts):
        print(f"{ext:<10} {ext_counts[ext]:>10}")
    print(f"\nTotal archivos: {total_files}")

if __name__ == "__main__":
    analyze_image_extensions(DATASET_PATH)

Extensiones de Imágenes
Extensión      Conteo
----------------------
.jpeg               2
.jpg           162912
.png                2

Total archivos: 162916


Se encontró la siguiente distribución de extensiones en las imágenes del dataset:

* Archivos con extensión .jpeg: 2

* Archivos con extensión .jpg: 162,912

* Archivos con extensión .png: 2

En total, el dataset contiene 162,916 archivos de imagen.

Dado que la gran mayoría de las imágenes utilizan la extensión .jpg, se requerirá unificar todas las extensiones al formato .jpg para facilitar el procesamiento, estandarización y evitar errores por incompatibilidades en la lectura o filtrado por extensión.

### ***Resoluciónes dentro del dataset por planta y tipo de Imagen***

Utilizaremos CUDA para procesar las imágenes directamente en nuestra tarjeta gráfica.
A continuación, se muestra un ejemplo de código para verificar que CUDA esté funcionando correctamente:

In [3]:
import cupy as cp
print(f"CUDA version: {cp.cuda.runtime.runtimeGetVersion()}")
print(f"GPU devices: {cp.cuda.runtime.getDeviceCount()}")

CUDA version: 12090
GPU devices: 1


Ahora analizaremos las dimensiones de las imágenes por cada carpeta (color, grayscale, y segmented) y por cada planta dentro del dataset.
Debido a posibles cuellos de botella en el procesamiento, este análisis se realizará planta por planta y carpeta por carpeta, de forma secuencial. 

***Conclusión del análisis de dimensiones de las imágenes***



Durante el análisis del dataset, se detectaron dimensiones distintas a 256 x 256, particularmente en la carpeta segmented de las plantas Peach, Strawberry y Potato.

Sin embargo, para asegurar la compatibilidad con el modelo a utilizar —ResNet, el cual requiere entradas de 224 x 224 píxeles—, será necesario redimensionar todas las imágenes a 224 x 224, sin importar su dimensión original.

Esta transformación garantiza una entrada homogénea al modelo, evitando errores durante la etapa de entrenamiento o inferencia.

#### ***Color***

##### ***Apple***

In [6]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Apple"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 3171/3171 [00:01<00:00, 3091.00it/s]


Dimensiones de Imágenes (Apple - COLOR)
Dimensión           Conteo
---------------------------
256x256               3171

Total imágenes procesadas: 3171


##### ***Blueberry***

In [7]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Blueberry"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 1502/1502 [00:00<00:00, 5103.20it/s]

Dimensiones de Imágenes (Blueberry - COLOR)
Dimensión           Conteo
---------------------------
256x256               1502

Total imágenes procesadas: 1502





##### ***Cherry***

In [8]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Cherry"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 1906/1906 [00:00<00:00, 4925.06it/s]

Dimensiones de Imágenes (Cherry - COLOR)
Dimensión           Conteo
---------------------------
256x256               1906

Total imágenes procesadas: 1906





##### ***Corn***

In [9]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Corn"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 3852/3852 [00:00<00:00, 4767.34it/s]

Dimensiones de Imágenes (Corn - COLOR)
Dimensión           Conteo
---------------------------
256x256               3852

Total imágenes procesadas: 3852





##### ***Grape***

In [10]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Grape"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 4062/4062 [00:00<00:00, 4864.67it/s]

Dimensiones de Imágenes (Grape - COLOR)
Dimensión           Conteo
---------------------------
256x256               4062

Total imágenes procesadas: 4062





##### ***Orange***

In [12]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Orange"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 5507/5507 [00:01<00:00, 3979.02it/s]

Dimensiones de Imágenes (Orange - COLOR)
Dimensión           Conteo
---------------------------
256x256               5507

Total imágenes procesadas: 5507





##### ***Peach***

In [13]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Peach"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 2657/2657 [00:00<00:00, 4653.24it/s]

Dimensiones de Imágenes (Peach - COLOR)
Dimensión           Conteo
---------------------------
256x256               2657

Total imágenes procesadas: 2657





##### ***Pepper***

In [14]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Pepper"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 2475/2475 [00:00<00:00, 5195.23it/s]

Dimensiones de Imágenes (Pepper - COLOR)
Dimensión           Conteo
---------------------------
256x256               2475

Total imágenes procesadas: 2475





##### ***Potato***

In [16]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Potato"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes:   0%|          | 0/2152 [00:00<?, ?it/s]

Procesando imágenes: 100%|██████████| 2152/2152 [00:00<00:00, 4981.51it/s]

Dimensiones de Imágenes (Potato - COLOR)
Dimensión           Conteo
---------------------------
256x256               2152

Total imágenes procesadas: 2152





##### ***Raspberry***

In [17]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Raspberry"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 371/371 [00:00<00:00, 4818.16it/s]

Dimensiones de Imágenes (Raspberry - COLOR)
Dimensión           Conteo
---------------------------
256x256                371

Total imágenes procesadas: 371





##### ***Soybean***

In [18]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Soybean"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 5090/5090 [00:01<00:00, 4975.56it/s]

Dimensiones de Imágenes (Soybean - COLOR)
Dimensión           Conteo
---------------------------
256x256               5090

Total imágenes procesadas: 5090





##### ***Squash***

In [19]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Squash"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 1835/1835 [00:00<00:00, 3553.04it/s]

Dimensiones de Imágenes (Squash - COLOR)
Dimensión           Conteo
---------------------------
256x256               1835

Total imágenes procesadas: 1835





##### ***Strawberry***

In [20]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Strawberry"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 1565/1565 [00:00<00:00, 5212.53it/s]

Dimensiones de Imágenes (Strawberry - COLOR)
Dimensión           Conteo
---------------------------
256x256               1565

Total imágenes procesadas: 1565





##### ***Tomato***

In [21]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Tomato"
# Categoría a procesar
CATEGORY = "color"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 18160/18160 [00:03<00:00, 4927.47it/s]

Dimensiones de Imágenes (Tomato - COLOR)
Dimensión           Conteo
---------------------------
256x256              18160

Total imágenes procesadas: 18160





#### ***Grayscale***

##### ***Apple***

In [22]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Apple"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 3171/3171 [00:00<00:00, 4704.76it/s]

Dimensiones de Imágenes (Apple - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256               3171

Total imágenes procesadas: 3171





##### ***Blueberry***

In [2]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Blueberry"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 1502/1502 [00:00<00:00, 5900.12it/s]

Dimensiones de Imágenes (Blueberry - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256               1502

Total imágenes procesadas: 1502





##### ***Cherry***

In [5]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Cherry"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 1906/1906 [00:00<00:00, 4933.46it/s]

Dimensiones de Imágenes (Cherry - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256               1906

Total imágenes procesadas: 1906





##### ***Corn***

In [6]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Corn"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 3852/3852 [00:00<00:00, 5632.23it/s]

Dimensiones de Imágenes (Corn - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256               3852

Total imágenes procesadas: 3852





##### ***Grape***

In [7]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Grape"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 4062/4062 [00:00<00:00, 5439.98it/s]

Dimensiones de Imágenes (Grape - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256               4062

Total imágenes procesadas: 4062





##### ***Orange***

In [8]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Orange"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 5507/5507 [00:01<00:00, 5465.90it/s]

Dimensiones de Imágenes (Orange - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256               5507

Total imágenes procesadas: 5507





##### ***Peach***

In [9]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Peach"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 2657/2657 [00:00<00:00, 5805.21it/s]

Dimensiones de Imágenes (Peach - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256               2657

Total imágenes procesadas: 2657





##### ***Pepper***

In [10]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Pepper"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 2475/2475 [00:00<00:00, 5093.92it/s]

Dimensiones de Imágenes (Pepper - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256               2475

Total imágenes procesadas: 2475





##### ***Potato***

In [12]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Potato"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 2152/2152 [00:00<00:00, 5764.15it/s]

Dimensiones de Imágenes (Potato - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256               2152

Total imágenes procesadas: 2152





##### ***Raspberry***

In [13]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Raspberry"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 371/371 [00:00<00:00, 5640.02it/s]

Dimensiones de Imágenes (Raspberry - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256                371

Total imágenes procesadas: 371





##### ***Soybean***

In [14]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Soybean"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 5090/5090 [00:00<00:00, 5485.34it/s]

Dimensiones de Imágenes (Soybean - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256               5090

Total imágenes procesadas: 5090





##### ***Squash***

In [15]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Squash"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 1835/1835 [00:00<00:00, 5462.27it/s]

Dimensiones de Imágenes (Squash - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256               1835

Total imágenes procesadas: 1835





##### ***Strawberry***

In [16]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Strawberry"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 1565/1565 [00:00<00:00, 4875.42it/s]

Dimensiones de Imágenes (Strawberry - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256               1565

Total imágenes procesadas: 1565





##### ***Tomato***

In [18]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Tomato"
# Categoría a procesar
CATEGORY = "grayscale"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 18160/18160 [00:03<00:00, 5530.09it/s]

Dimensiones de Imágenes (Tomato - GRAYSCALE)
Dimensión           Conteo
---------------------------
256x256              18160

Total imágenes procesadas: 18160





#### ***Segmented***

##### ***Apple***

In [19]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Apple"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 3171/3171 [00:00<00:00, 4925.75it/s]

Dimensiones de Imágenes (Apple - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256               3171

Total imágenes procesadas: 3171





##### ***Blueberry***

In [20]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Blueberry"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 1502/1502 [00:00<00:00, 4747.68it/s]

Dimensiones de Imágenes (Blueberry - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256               1502

Total imágenes procesadas: 1502





##### ***Cherry***

In [21]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Cherry"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 1906/1906 [00:00<00:00, 5193.47it/s]

Dimensiones de Imágenes (Cherry - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256               1906

Total imágenes procesadas: 1906





##### ***Corn***

In [22]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Corn"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 3852/3852 [00:01<00:00, 3468.22it/s]

Dimensiones de Imágenes (Corn - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256               3852

Total imágenes procesadas: 3852





##### ***Grape***

In [23]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Grape"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 4063/4063 [00:00<00:00, 4969.45it/s]

Dimensiones de Imágenes (Grape - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256               4063

Total imágenes procesadas: 4063





##### ***Orange***

In [24]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Orange"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 5507/5507 [00:01<00:00, 5067.89it/s]

Dimensiones de Imágenes (Orange - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256               5507

Total imágenes procesadas: 5507





##### ***Peach***
(Otras dimensiones aparte de: 256x 256)

In [27]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Peach"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 2657/2657 [00:00<00:00, 5261.41it/s]

Dimensiones de Imágenes (Peach - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256               2655
324x512                  1
466x512                  1

Total imágenes procesadas: 2657





##### ***Pepper***

In [28]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Pepper"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 2475/2475 [00:00<00:00, 5333.59it/s]

Dimensiones de Imágenes (Pepper - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256               2475

Total imágenes procesadas: 2475





##### ***Potato***

In [29]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Potato"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 2152/2152 [00:00<00:00, 5096.48it/s]

Dimensiones de Imágenes (Potato - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256               2152

Total imágenes procesadas: 2152





##### ***Raspberry***

In [30]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Raspberry"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 371/371 [00:00<00:00, 4685.99it/s]

Dimensiones de Imágenes (Raspberry - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256                371

Total imágenes procesadas: 371





##### ***Soybean***

In [31]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Soybean"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 5090/5090 [00:00<00:00, 5365.50it/s]

Dimensiones de Imágenes (Soybean - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256               5090

Total imágenes procesadas: 5090





##### ***Squash***

In [32]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Squash"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 1835/1835 [00:00<00:00, 4558.84it/s]

Dimensiones de Imágenes (Squash - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256               1835

Total imágenes procesadas: 1835





##### ***Strawberry***
(Otras dimensiones aparte de: 256x 256)

In [33]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Strawberry"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 1565/1565 [00:00<00:00, 5115.83it/s]

Dimensiones de Imágenes (Strawberry - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256               1564
470x512                  1

Total imágenes procesadas: 1565





##### ***Tomato***
(Otras dimensiones aparte de: 256x 256)

In [34]:
import os
from PIL import Image
import cupy as cp
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
# Nombre de la planta a procesar
PLANT_NAME = "Tomato"
# Categoría a procesar
CATEGORY = "segmented"
# Número de hilos para paralelización
MAX_WORKERS = 16

def collect_image_paths(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        if PLANT_NAME in root:
            for file in files:
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    file_paths.append(os.path.join(root, file))
    return file_paths

def process_image(file_path):
    try:
        with Image.open(file_path) as img:
            return (img.size[0], img.size[1])
    except Exception as e:
        print(f"Error al procesar {file_path}: {e}")
        return None

def analyze_plant_dimensions():
    category_path = os.path.join(DATASET_PATH, CATEGORY, PLANT_NAME)
    if not os.path.exists(category_path):
        print(f"Error: No se encontró la carpeta {category_path}.")
        return None, None, 0
    
    file_paths = collect_image_paths(category_path)
    dimensions = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        # Usar tqdm para mostrar progreso
        with tqdm(total=len(file_paths), desc="Procesando imágenes") as pbar:
            for dim in executor.map(process_image, file_paths):
                if dim is not None:
                    dimensions.append(dim)
                pbar.update(1)
    
    try:
        dim_array = cp.array(dimensions, dtype=cp.int32)
        unique_dims, counts = np.unique(dim_array.get(), axis=0, return_counts=True)
    except Exception as e:
        print(f"Error en el cálculo de dimensiones: {e}")
        return None, None, len(dimensions)
    
    return unique_dims, counts, len(dimensions)

if __name__ == "__main__":
    unique_dims, counts, total_dims = analyze_plant_dimensions()
    if unique_dims is not None and counts is not None:
        print(f"{'=' * 30}")
        print(f"Dimensiones de Imágenes ({PLANT_NAME} - {CATEGORY.upper()})")
        print(f"{'=' * 30}")
        print(f"{'Dimensión':<15} {'Conteo':>10}")
        print("-" * 27)
        for dim, count in zip(unique_dims, counts):
            print(f"{f'{dim[0]}x{dim[1]}':<15} {count:>10}")
        print(f"\nTotal imágenes procesadas: {total_dims}")

Procesando imágenes: 100%|██████████| 18160/18160 [00:03<00:00, 5134.67it/s]

Dimensiones de Imágenes (Tomato - SEGMENTED)
Dimensión           Conteo
---------------------------
256x256              18159
335x512                  1

Total imágenes procesadas: 18160





### ***Detección de imágenes corruptas o vacías***


Se realizará una revisión del dataset para identificar imágenes corruptas o vacías, con el objetivo de determinar si es necesario eliminarlas o corregirlas antes de continuar con el preprocesamiento y entrenamiento del modelo. Esta validación es fundamental para garantizar la calidad y consistencia de los datos.

In [40]:
import os
from PIL import Image
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

# Configuración
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
MAX_WORKERS = 16

def collect_image_paths(input_dir):
    """Recolecta rutas de imágenes .jpg, .jpeg y .png con barra de progreso."""
    file_paths = []
    for root, _, files in tqdm(os.walk(input_dir), desc="Recolectando imágenes"):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                file_paths.append(os.path.join(root, file))
    return file_paths

def check_image(file_path):
    """Verifica si una imagen es válida o está corrupta."""
    try:
        if os.path.getsize(file_path) == 0:
            return file_path, "Archivo vacío (0 bytes)"
        with Image.open(file_path) as img:
            img.verify()
            img = Image.open(file_path)
            img.load()
            if img.size[0] == 0 or img.size[1] == 0:
                return file_path, "Dimensiones inválidas"
        return file_path, None
    except Exception as e:
        return file_path, f"Error: {str(e)}"

def check_dataset(input_dir, max_workers=MAX_WORKERS):
    """Verifica imágenes en paralelo y genera informe."""
    file_paths = collect_image_paths(input_dir)
    corrupted_images = []

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_file = {executor.submit(check_image, path): path for path in file_paths}
        for future in tqdm(as_completed(future_to_file), total=len(file_paths), desc="Verificando imágenes"):
            file_path, error = future.result()
            if error:
                corrupted_images.append((file_path, error))

    # Resumen
    print(f"\n{'=' * 20}")
    print(f"Total imágenes: {len(file_paths)}")
    print(f"Imágenes corruptas: {len(corrupted_images)}")
    if corrupted_images:
        print("\nImágenes con problemas:")
        for file_path, error in corrupted_images:
            print(f"  {file_path}: {error}")
        output_file = r"C:\Users\Arys\Desktop\Proyecto - 2\corrupted_images.txt"
        with open(output_file, 'w') as f:
            for file_path, _ in corrupted_images:
                f.write(f"{file_path}\n")
        print(f"\nLista de imágenes corruptas guardada en: {output_file}")
    else:
        print("✅ No se encontraron imágenes corruptas.")

    return corrupted_images

if __name__ == "__main__":
    check_dataset(DATASET_PATH)

Recolectando imágenes: 160it [00:00, 332.28it/s]
Verificando imágenes: 100%|██████████| 162916/162916 [01:04<00:00, 2527.33it/s]



Total imágenes: 162916
Imágenes corruptas: 0
✅ No se encontraron imágenes corruptas.


### ***Buscar balanceo entre clases***

Se analizará la distribución de imágenes entre las distintas clases —o en este caso, entre los diferentes estados de salud de las plantas— con el objetivo de verificar si existe un balance adecuado.
Este análisis permitirá identificar posibles desbalances que, en etapas posteriores, podrían requerir técnicas de reajuste de cantidades o asignación de pesos por clase durante el entrenamiento del modelo, para evitar sesgos y mejorar el rendimiento general.

***Conclusión del análisis***

Tras el análisis realizado, se llegó a las siguientes conclusiones:

Balance entre clases:
Se observó un desbalance significativo en la cantidad de imágenes entre distintas clases (estados de salud de las plantas). Esto indica que será necesario aplicar técnicas de balanceo, ya sea ajustando la cantidad de muestras por clase o utilizando pesos diferenciados durante el entrenamiento del modelo. Esta medida es crucial para evitar sesgos que afecten negativamente la capacidad de generalización del modelo, especialmente al emplear arquitecturas como ResNet.

Verificación de imágenes corruptas o vacías:
Se ejecutó un script para identificar imágenes que presentaran errores como archivos vacíos, daños en la codificación o dimensiones inválidas.
El resultado mostró que existen imágenes corruptas en el dataset, por lo que será necesario eliminarlas o corregirlas antes de continuar con el preprocesamiento.
Se generó un archivo con la lista de imágenes problemáticas para facilitar su depuración:

In [5]:
import os
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

# Configuración
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
MAX_WORKERS = 16
OUTPUT_CSV = r"C:\Users\Arys\Desktop\Proyecto - 2\class_counts.csv"

def collect_class_counts(input_dir):
    """Recolecta conteos de imágenes .jpg, .jpeg y .png por clase con barra de progreso."""
    class_counts = {}
    subdirs = ['color', 'grayscale', 'segmented']

    for subdir in tqdm(subdirs, desc="Procesando subdirectorios"):
        subdir_path = os.path.join(input_dir, subdir)
        if not os.path.exists(subdir_path):
            continue

        for plant in os.listdir(subdir_path):
            plant_path = os.path.join(subdir_path, plant)
            if not os.path.isdir(plant_path):
                continue

            for state in os.listdir(plant_path):
                state_path = os.path.join(plant_path, state)
                if not os.path.isdir(state_path):
                    continue

                images = [f for f in os.listdir(state_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
                if images:
                    class_name = f"{subdir}_{plant}_{state}"
                    class_counts[class_name] = len(images)

    return class_counts

def generate_balance_report(class_counts):
    """Genera informe de balance de clases."""
    if not class_counts:
        print("Error: No se encontraron clases con imágenes.")
        return

    # Convertir a DataFrame y ordenar
    df = pd.DataFrame(list(class_counts.items()), columns=['Class', 'Count']).sort_values(by='Count', ascending=False)

    # Estadísticas
    total_images = df['Count'].sum()
    num_classes = len(df)
    mean_count = df['Count'].mean()
    std_count = df['Count'].std() if len(df['Count']) > 1 else 0
    min_count = df['Count'].min()
    max_count = df['Count'].max()

    # Imprimir resumen
    print(f"\n{'=' * 20}")
    print(f"Total imágenes: {total_images}")
    print(f"Total clases: {num_classes}")
    print(f"Media de imágenes por clase: {mean_count:.2f}")
    print(f"Desviación estándar: {std_count:.2f}")
    print(f"Mínimo: {min_count} ({df.loc[df['Count'].idxmin(), 'Class']})")
    print(f"Máximo: {max_count} ({df.loc[df['Count'].idxmax(), 'Class']})")

    # Evaluar balance
    imbalance_ratio = max_count / min_count if min_count > 0 else float('inf')
    print(f"Relación de desbalance: {imbalance_ratio:.2f}")
    print("⚠️ Dataset desbalanceado. Considera balancear las clases." if imbalance_ratio > 2 and num_classes > 1 else "✅ Dataset razonablemente balanceado.")

    # Guardar conteos en CSV
    df.to_csv(OUTPUT_CSV, index=False)
    print(f"Conteo guardado en: {OUTPUT_CSV}")

    # Mostrar clases extremas
    print("\nClases con más imágenes:")
    print(df.head(5)[['Class', 'Count']].to_string(index=False) if len(df) >= 5 else "No hay datos suficientes.")
    print("\nClases con menos imágenes:")
    print(df.tail(5)[['Class', 'Count']].to_string(index=False) if len(df) >= 5 else "No hay datos suficientes.")

def main():
    """Ejecuta el análisis de balance de clases."""
    class_counts = collect_class_counts(DATASET_PATH)
    generate_balance_report(class_counts)

if __name__ == "__main__":
    main()

Procesando subdirectorios: 100%|██████████| 3/3 [00:00<00:00, 15.46it/s]


Total imágenes: 162916
Total clases: 114
Media de imágenes por clase: 1429.09
Desviación estándar: 1260.43
Mínimo: 152 (color_Potato_Potato___healthy)
Máximo: 5507 (color_Orange_Orange___Haunglongbing_(Citrus_greening))
Relación de desbalance: 36.23
⚠️ Dataset desbalanceado. Considera balancear las clases.
Conteo guardado en: C:\Users\Arys\Desktop\Proyecto - 2\class_counts.csv

Clases con más imágenes:
                                                    Class  Count
    color_Orange_Orange___Haunglongbing_(Citrus_greening)   5507
grayscale_Orange_Orange___Haunglongbing_(Citrus_greening)   5507
segmented_Orange_Orange___Haunglongbing_(Citrus_greening)   5507
  segmented_Tomato_Tomato___Tomato_Yellow_Leaf_Curl_Virus   5357
  grayscale_Tomato_Tomato___Tomato_Yellow_Leaf_Curl_Virus   5357

Clases con menos imágenes:
                                   Class  Count
grayscale_Apple_Apple___Cedar_apple_rust    275
segmented_Apple_Apple___Cedar_apple_rust    275
           color_Potato_Potato_




## ***Preprocesamiento***

### ***Conversión de formatos de imagen***

Se identificaron 2 archivos con extensión .jpeg y 2 archivos con extensión .png dentro del dataset.
Para mantener la uniformidad en el preprocesamiento, todas estas imágenes fueron convertidas al formato .jpg, eliminando las versiones originales con extensiones distintas. Esto garantiza una estructura de datos homogénea para las etapas posteriores del flujo de trabajo.

In [45]:
import os
from PIL import Image
from tqdm import tqdm

# Ruta del dataset local
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"

def convert_only_png_jpeg_to_jpg_replace(input_dir):
    """Convierte imágenes .png y .jpeg a .jpg en el mismo directorio, eliminando los originales."""
    target_extensions = ('.png', '.jpeg')
    converted_count = {'jpeg': 0, 'png': 0}
    total_files_processed = 0

    # Recorre directorios y busca imágenes .png y .jpeg
    for root, _, files in tqdm(os.walk(input_dir), desc="Procesando directorios"):
        for file in files:
            ext = os.path.splitext(file)[1].lower()
            if ext in target_extensions:
                input_path = os.path.join(root, file)
                output_path = os.path.join(root, os.path.splitext(file)[0] + '.jpg')

                try:
                    # Convierte la imagen a RGB y guarda como .jpg
                    with Image.open(input_path) as img:
                        if img.mode != 'RGB':
                            img = img.convert('RGB')
                        img.save(output_path, 'JPEG', quality=95)
                    converted_count[ext[1:]] += 1
                    total_files_processed += 1
                    # Elimina el archivo original
                    os.remove(input_path)
                except Exception as e:
                    print(f"Error al procesar {input_path}: {e}")

    # Imprime resumen de conversiones
    print(f"\n{'=' * 20}")
    print(f"Imágenes .jpeg convertidas: {converted_count['jpeg']}")
    print(f"Imágenes .png convertidas: {converted_count['png']}")
    print(f"Total archivos procesados: {total_files_processed}")
    if total_files_processed == 0:
        print("No se encontraron imágenes .png o .jpeg.")

    return converted_count

if __name__ == "__main__":
    convert_only_png_jpeg_to_jpg_replace(DATASET_PATH)

Procesando directorios: 160it [00:00, 218.58it/s]


Imágenes .jpeg convertidas: 0
Imágenes .png convertidas: 0
Total archivos procesados: 0
No se encontraron imágenes .png o .jpeg.





### ***Redimenzionamiento de imagenes a 224***

Se redimensionaron todas las imágenes a 224 x 224 píxeles, ya que la mayoría se encontraba en dimensiones como 256 x 256 u otras variantes. Esta estandarización es necesaria para asegurar la compatibilidad con el modelo ResNet, que requiere una entrada fija de dicha dimensión.

In [46]:
import os
from PIL import Image
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

# Configuración
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset"
OUTPUT_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset_resized"
TARGET_SIZE = (224, 224)
MAX_WORKERS = 16

def collect_image_paths(input_dir):
    """Recolecta rutas de imágenes .jpg."""
    file_paths = []
    # Recorre directorios y recolecta imágenes .jpg
    for root, _, files in tqdm(os.walk(input_dir), desc="Recolectando imágenes"):
        for file in files:
            if file.lower().endswith('.jpg'):
                file_paths.append(os.path.join(root, file))
    return file_paths

def resize_image(file_path, output_dir, target_size):
    """Redimensiona una imagen .jpg y la guarda en el directorio de salida."""
    try:
        # Crea subdirectorio de salida manteniendo la estructura
        relative_path = os.path.relpath(os.path.dirname(file_path), DATASET_PATH)
        output_subdir = os.path.join(output_dir, relative_path)
        os.makedirs(output_subdir, exist_ok=True)
        output_path = os.path.join(output_subdir, os.path.basename(file_path))

        # Abre, convierte a RGB si es necesario, redimensiona y guarda
        with Image.open(file_path) as img:
            if img.mode != 'RGB':
                img = img.convert('RGB')
            img_resized = img.resize(target_size, Image.LANCZOS)
            img_resized.save(output_path, 'JPEG', quality=95)
        return file_path, output_path, None
    except Exception as e:
        return file_path, None, str(e)

def resize_images(input_dir, output_dir, target_size, max_workers=MAX_WORKERS):
    """Redimensiona imágenes .jpg en paralelo y genera informe."""
    file_paths = collect_image_paths(input_dir)
    resized_count = 0
    errors = []

    # Procesa imágenes en paralelo
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_file = {executor.submit(resize_image, path, output_dir, target_size): path for path in file_paths}
        for future in tqdm(as_completed(future_to_file), total=len(file_paths), desc="Redimensionando imágenes"):
            input_path, output_path, error = future.result()
            if error:
                errors.append((input_path, error))
            else:
                resized_count += 1

    # Imprime resumen
    print(f"\n{'=' * 20}")
    print(f"Imágenes redimensionadas: {resized_count}")
    print(f"Errores: {len(errors)}")
    if errors:
        print("\nArchivos con errores:")
        for path, error in errors:
            print(f"  {path}: {error}")
    if resized_count == 0:
        print("No se encontraron imágenes .jpg.")

if __name__ == "__main__":
    resize_images(DATASET_PATH, OUTPUT_PATH, TARGET_SIZE)

Recolectando imágenes: 160it [00:01, 137.58it/s]
Redimensionando imágenes: 100%|██████████| 162916/162916 [04:07<00:00, 658.40it/s]



Imágenes redimensionadas: 162916
Errores: 0


### ***Balanceo de clases por cada planta y estado***

In [1]:
import os
import pandas as pd
from tqdm import tqdm

# Configuración
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset_resized"
OUTPUT_WEIGHTS_DIR = r"C:\Users\Arys\Desktop\Proyecto - 2\balanced_weights"
OUTPUT_WEIGHTS = os.path.join(OUTPUT_WEIGHTS_DIR, "plant_weights.csv")
os.makedirs(OUTPUT_WEIGHTS_DIR, exist_ok=True)

def apply_plant_balancing(class_counts):
    """Aplica balanceo por planta con ponderación inversa."""
    # Extrae plantas únicas
    plants = set('_'.join(cls.split('_')[1:2]) for cls in class_counts['Class'])
    balanced_data = []

    # Procesa cada planta
    for plant in tqdm(plants, desc="Balanceando plantas"):
        # Filtra clases de la planta actual
        plant_classes = [cls for cls in class_counts['Class'] if f"_{plant}_" in cls]
        plant_data = class_counts[class_counts['Class'].isin(plant_classes)].copy()

        if plant_data.empty:
            continue

        # Calcula pesos inversos para balanceo
        total_images = plant_data['Count'].sum()
        if total_images == 0:
            continue
        plant_data['Weight'] = total_images / plant_data['Count']
        weight_sum = plant_data['Weight'].sum()
        plant_data['Weight'] = plant_data['Weight'] / weight_sum

        # Genera lista de imágenes por clase
        for _, row in plant_data.iterrows():
            img_parts = row['Class'].split('_')
            img_type = img_parts[0]
            plant_name = img_parts[1]
            state = '_'.join(img_parts[2:])
            state_path = os.path.join(DATASET_PATH, img_type, plant_name, state)

            if os.path.exists(state_path):
                images = [f for f in os.listdir(state_path) if f.lower().endswith('.jpg')]
                for img in images[:min(len(images), row['Count'])]:
                    balanced_data.append([row['Class'], img, row['Weight']])

    # Guarda datos balanceados en CSV
    if balanced_data:
        balanced_df = pd.DataFrame(balanced_data, columns=['Class', 'Image', 'Weight'])
        balanced_df.to_csv(OUTPUT_WEIGHTS, index=False)
        print(f"\n{'=' * 20}")
        print(f"Total imágenes balanceadas: {len(balanced_df)}")
        print(f"Clases procesadas: {len(balanced_df['Class'].unique())}")
        print(f"Pesos guardados en: {OUTPUT_WEIGHTS}")
    else:
        print("Error: No se generaron datos balanceados. Verifica las rutas y el CSV.")

# Carga conteos y aplica balanceo
if __name__ == "__main__":
    class_counts = pd.read_csv(r"C:\Users\Arys\Desktop\Proyecto - 2\class_counts.csv")
    apply_plant_balancing(class_counts)

Balanceando plantas: 100%|██████████| 14/14 [00:00<00:00, 21.67it/s]



Total imágenes balanceadas: 162916
Clases procesadas: 114
Pesos guardados en: C:\Users\Arys\Desktop\Proyecto - 2\balanced_weights\plant_weights.csv


### ***Cargamos los pesos***

In [2]:
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms
from tqdm import tqdm

# Configuración
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset_resized"
WEIGHTS_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\balanced_weights\plant_weights.csv"
BATCH_SIZE = 32
NUM_WORKERS = 4

# Transformaciones para entrenamiento
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class PlantDataset(Dataset):
    """Carga imágenes .jpg con pesos para balanceo."""
    def __init__(self, weights_df, root_dir, transform=None):
        self.data = weights_df
        self.root_dir = root_dir
        self.transform = transform
        self.class_to_idx = {cls: idx for idx, cls in enumerate(weights_df['Class'].unique())}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        """Obtiene imagen, etiqueta y peso."""
        row = self.data.iloc[idx]
        img_parts = row['Class'].split('_')
        img_path = f"{self.root_dir}/{img_parts[0]}/{img_parts[1]}/{'_'.join(img_parts[2:])}/{row['Image']}"

        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = self.class_to_idx[row['Class']]
        weight = row['Weight']
        return image, label, weight

def create_weighted_dataloader(weights_path, root_dir, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS):
    """Crea DataLoader con muestreo ponderado."""
    # Carga datos balanceados
    weights_df = pd.read_csv(weights_path)
    
    # Crea dataset con barra de progreso
    dataset = PlantDataset(weights_df, root_dir, transform=train_transforms)
    
    # Configura WeightedRandomSampler
    weights = torch.tensor(weights_df['Weight'].values, dtype=torch.float)
    sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

    # Crea DataLoader
    dataloader = DataLoader(dataset, batch_size=batch_size, sampler=sampler, num_workers=num_workers)
    
    # Imprime resumen
    print(f"\n{'=' * 20}")
    print(f"Imágenes cargadas: {len(dataset)}")
    print(f"Clases únicas: {len(dataset.class_to_idx)}")
    
    return dataloader

if __name__ == "__main__":
    # Configura DataLoader con barra de progreso
    with tqdm(total=1, desc="Configurando DataLoader") as pbar:
        dataloader = create_weighted_dataloader(WEIGHTS_PATH, DATASET_PATH)
        pbar.update(1)

Configurando DataLoader: 100%|██████████| 1/1 [00:00<00:00,  4.85it/s]


Imágenes cargadas: 162916
Clases únicas: 114





## ***División del dataset***


In [4]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

# Configuración
WEIGHTS_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\balanced_weights\plant_weights.csv"
OUTPUT_WEIGHTS_DIR = r"C:\Users\Arys\Desktop\Proyecto - 2\balanced_weights"
TRAIN_WEIGHTS = os.path.join(OUTPUT_WEIGHTS_DIR, "train_weights.csv")
VAL_WEIGHTS = os.path.join(OUTPUT_WEIGHTS_DIR, "val_weights.csv")
TEST_WEIGHTS = os.path.join(OUTPUT_WEIGHTS_DIR, "test_weights.csv")
os.makedirs(OUTPUT_WEIGHTS_DIR, exist_ok=True)

def split_dataset(weights_path, train_ratio=0.7, val_ratio=0.15):
    """Divide el dataset en conjuntos de entrenamiento, validación y prueba."""
    if not os.path.exists(weights_path):
        raise FileNotFoundError(f"No se encontró {weights_path}")
    
    df = pd.read_csv(weights_path)
    if not all(col in df.columns for col in ['Class', 'Image', 'Weight']):
        raise ValueError("El CSV debe contener 'Class', 'Image' y 'Weight'")
    
    if len(df) < 10:  # Validación mínima
        raise ValueError("El dataset es demasiado pequeño para dividir")
    
    num_classes = len(df['Class'].unique())
    print(f"Clases encontradas: {num_classes}")
    
    # Divide en entrenamiento y resto (validación + prueba)
    train_df, temp_df = train_test_split(
        df, train_size=train_ratio, stratify=df['Class'], random_state=42
    )
    val_size = val_ratio / (1 - train_ratio)
    val_df, test_df = train_test_split(
        temp_df, train_size=val_size, stratify=temp_df['Class'], random_state=42
    )
    
    # Guarda los conjuntos
    train_df.to_csv(TRAIN_WEIGHTS, index=False)
    val_df.to_csv(VAL_WEIGHTS, index=False)
    test_df.to_csv(TEST_WEIGHTS, index=False)
    
    print(f"\n{'=' * 20}")
    print(f"Total imágenes: {len(df)}")
    print(f"Entrenamiento: {len(train_df)} imágenes ({len(train_df['Class'].unique())} clases)")
    print(f"Validación: {len(val_df)} imágenes ({len(val_df['Class'].unique())} clases)")
    print(f"Prueba: {len(test_df)} imágenes ({len(test_df['Class'].unique())} clases)")
    print(f"CSVs guardados en: {OUTPUT_WEIGHTS_DIR}")

if __name__ == "__main__":
    try:
        split_dataset(WEIGHTS_PATH)
    except Exception as e:
        print(f"Error: {str(e)}")

Clases encontradas: 114

Total imágenes: 162916
Entrenamiento: 114041 imágenes (114 clases)
Validación: 24437 imágenes (114 clases)
Prueba: 24438 imágenes (114 clases)
CSVs guardados en: C:\Users\Arys\Desktop\Proyecto - 2\balanced_weights


## ***Entrenamiento del modelo***

In [5]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import models, transforms
from torchvision.models import ResNet18_Weights
from tqdm import tqdm

# =================== CONFIGURACIÓN ===================
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset_resized"
WEIGHTS_DIR = r"C:\Users\Arys\Desktop\Proyecto - 2\balanced_weights"
TRAIN_WEIGHTS = os.path.join(WEIGHTS_DIR, "train_weights.csv")
VAL_WEIGHTS = os.path.join(WEIGHTS_DIR, "val_weights.csv")
TEST_WEIGHTS = os.path.join(WEIGHTS_DIR, "test_weights.csv")
MODEL_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\normal_model.pth"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# =================== TRANSFORMACIONES ===================
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# =================== DATASET PERSONALIZADO ===================
class PlantDataset(Dataset):
    def __init__(self, weights_df, root_dir, transform=None, class_to_idx=None):
        self.data = weights_df
        self.root_dir = root_dir
        self.transform = transform
        self.class_to_idx = class_to_idx

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_parts = row['Class'].split('_')
        img_path = f"{self.root_dir}/{img_parts[0]}/{img_parts[1]}/{'_'.join(img_parts[2:])}/{row['Image']}"
        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            label = self.class_to_idx[row['Class']]
            return image, label
        except Exception as e:
            print(f"Error al cargar {img_path}: {str(e)}")
            raise

# =================== DATALOADER ===================
def create_dataloader(weights_path, root_dir, transform, class_to_idx, batch_size=64, use_sampler=True):
    if not os.path.exists(weights_path):
        raise FileNotFoundError(f"No se encontró {weights_path}")
    weights_df = pd.read_csv(weights_path)
    if not all(col in weights_df.columns for col in ['Class', 'Image', 'Weight']):
        raise ValueError(f"El CSV debe contener 'Class', 'Image' y 'Weight'")
    
    dataset = PlantDataset(weights_df, root_dir, transform=transform, class_to_idx=class_to_idx)
    sampler = None
    if use_sampler:
        weights = torch.tensor(weights_df['Weight'].values, dtype=torch.float)
        sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)
    dataloader = DataLoader(dataset, batch_size=batch_size, sampler=sampler, shuffle=not use_sampler)
    print(f"Cargado {weights_path}: {len(dataset)} imágenes, {len(class_to_idx)} clases")
    return dataloader

# =================== ENTRENAMIENTO ===================
def train_model(train_loader, val_loader, num_epochs=20, early_stop_patience=3):
    model = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    num_classes = len(train_loader.dataset.class_to_idx)
    model.fc = nn.Sequential(
        nn.Dropout(0.3),
        nn.Linear(model.fc.in_features, num_classes)
    )
    model = model.to(DEVICE)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.0005)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2)

    best_acc = 0.0
    epochs_without_improvement = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader, desc=f"Entrenando época {epoch+1}/{num_epochs}", leave=True):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = 100 * correct / total

        # VALIDACIÓN
        model.eval()
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc="Validando", leave=False):
                images, labels = images.to(DEVICE), labels.to(DEVICE)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_acc = 100 * val_correct / val_total
        scheduler.step(val_acc)

        print(f"\nÉpoca {epoch+1}: Pérdida Entrenamiento: {train_loss:.4f}, Precisión Entrenamiento: {train_acc:.2f}%")
        print(f"Precisión Validación: {val_acc:.2f}%")

        # EARLY STOPPING Y SAVE MODEL
        if val_acc > best_acc:
            best_acc = val_acc
            epochs_without_improvement = 0
            try:
                torch.save(model.state_dict(), MODEL_PATH)
                print(f"Mejor modelo guardado en: {MODEL_PATH}")
            except Exception as e:
                print(f"Error al guardar el modelo: {str(e)}")
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement >= early_stop_patience:
            print(f"Parando temprano en época {epoch+1}")
            break

    return model

# =================== EVALUACIÓN ===================
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc="Evaluando en prueba", leave=False):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_acc = 100 * correct / total
    print(f"\n{'=' * 20}")
    print(f"Precisión en prueba: {test_acc:.2f}%")
    return test_acc

# =================== MAIN ===================
if __name__ == "__main__":
    try:
        # Verificar CSVs
        for path in [TRAIN_WEIGHTS, VAL_WEIGHTS, TEST_WEIGHTS]:
            if not os.path.exists(path):
                raise FileNotFoundError(f"No se encontró {path}")
        
        # Crear mapeo global de clases
        train_df = pd.read_csv(TRAIN_WEIGHTS)
        all_classes = sorted(train_df['Class'].unique())
        class_to_idx = {cls: idx for idx, cls in enumerate(all_classes)}

        with tqdm(total=3, desc="Configurando DataLoaders", leave=False) as pbar:
            train_loader = create_dataloader(TRAIN_WEIGHTS, DATASET_PATH, train_transforms, class_to_idx, use_sampler=True)
            pbar.update(1)
            val_loader = create_dataloader(VAL_WEIGHTS, DATASET_PATH, val_transforms, class_to_idx, use_sampler=False)
            pbar.update(1)
            test_loader = create_dataloader(TEST_WEIGHTS, DATASET_PATH, val_transforms, class_to_idx, use_sampler=False)
            pbar.update(1)

        model = train_model(train_loader, val_loader)
        evaluate_model(model, test_loader)
    except Exception as e:
        print(f"Error: {str(e)}")

                                                                       

Cargado C:\Users\Arys\Desktop\Proyecto - 2\balanced_weights\train_weights.csv: 114041 imágenes, 114 clases
Cargado C:\Users\Arys\Desktop\Proyecto - 2\balanced_weights\val_weights.csv: 24437 imágenes, 114 clases
Cargado C:\Users\Arys\Desktop\Proyecto - 2\balanced_weights\test_weights.csv: 24438 imágenes, 114 clases


Entrenando época 1/20: 100%|██████████| 1782/1782 [14:12<00:00,  2.09it/s]
                                                            


Época 1: Pérdida Entrenamiento: 0.5696, Precisión Entrenamiento: 84.06%
Precisión Validación: 79.73%
Mejor modelo guardado en: C:\Users\Arys\Desktop\Proyecto - 2\normal_model.pth


Entrenando época 2/20: 100%|██████████| 1782/1782 [14:41<00:00,  2.02it/s]
                                                            


Época 2: Pérdida Entrenamiento: 0.2960, Precisión Entrenamiento: 90.78%
Precisión Validación: 84.75%
Mejor modelo guardado en: C:\Users\Arys\Desktop\Proyecto - 2\normal_model.pth


Entrenando época 3/20: 100%|██████████| 1782/1782 [13:43<00:00,  2.16it/s]
                                                            


Época 3: Pérdida Entrenamiento: 0.2565, Precisión Entrenamiento: 91.88%
Precisión Validación: 87.87%
Mejor modelo guardado en: C:\Users\Arys\Desktop\Proyecto - 2\normal_model.pth


Entrenando época 4/20: 100%|██████████| 1782/1782 [13:26<00:00,  2.21it/s]
                                                            


Época 4: Pérdida Entrenamiento: 0.2243, Precisión Entrenamiento: 92.82%
Precisión Validación: 88.47%
Mejor modelo guardado en: C:\Users\Arys\Desktop\Proyecto - 2\normal_model.pth


Entrenando época 5/20: 100%|██████████| 1782/1782 [13:02<00:00,  2.28it/s]
                                                            


Época 5: Pérdida Entrenamiento: 0.2126, Precisión Entrenamiento: 93.16%
Precisión Validación: 89.50%
Mejor modelo guardado en: C:\Users\Arys\Desktop\Proyecto - 2\normal_model.pth


Entrenando época 6/20: 100%|██████████| 1782/1782 [13:02<00:00,  2.28it/s]
                                                            


Época 6: Pérdida Entrenamiento: 0.1913, Precisión Entrenamiento: 93.81%
Precisión Validación: 82.50%


Entrenando época 7/20: 100%|██████████| 1782/1782 [13:02<00:00,  2.28it/s]
                                                            


Época 7: Pérdida Entrenamiento: 0.1832, Precisión Entrenamiento: 94.06%
Precisión Validación: 90.17%
Mejor modelo guardado en: C:\Users\Arys\Desktop\Proyecto - 2\normal_model.pth


Entrenando época 8/20: 100%|██████████| 1782/1782 [13:55<00:00,  2.13it/s]
                                                            


Época 8: Pérdida Entrenamiento: 0.1696, Precisión Entrenamiento: 94.43%
Precisión Validación: 91.06%
Mejor modelo guardado en: C:\Users\Arys\Desktop\Proyecto - 2\normal_model.pth


Entrenando época 9/20: 100%|██████████| 1782/1782 [13:53<00:00,  2.14it/s]
                                                            


Época 9: Pérdida Entrenamiento: 0.1636, Precisión Entrenamiento: 94.52%
Precisión Validación: 93.45%
Mejor modelo guardado en: C:\Users\Arys\Desktop\Proyecto - 2\normal_model.pth


Entrenando época 10/20: 100%|██████████| 1782/1782 [13:52<00:00,  2.14it/s]
                                                            


Época 10: Pérdida Entrenamiento: 0.1564, Precisión Entrenamiento: 94.78%
Precisión Validación: 92.07%


Entrenando época 11/20: 100%|██████████| 1782/1782 [15:45<00:00,  1.88it/s]
                                                            


Época 11: Pérdida Entrenamiento: 0.1505, Precisión Entrenamiento: 95.08%
Precisión Validación: 91.06%


Entrenando época 12/20: 100%|██████████| 1782/1782 [16:57<00:00,  1.75it/s]
                                                            


Época 12: Pérdida Entrenamiento: 0.1493, Precisión Entrenamiento: 95.03%
Precisión Validación: 91.74%
Parando temprano en época 12


                                                                      


Precisión en prueba: 91.85%




## ***Pruebas con camara***

In [1]:
import cv2
import torch
import torch.nn as nn
from PIL import Image
from torchvision import models, transforms
import pandas as pd
import numpy as np
import os
import time
from tqdm import tqdm

# ================= CONFIGURACIÓN =================
MODEL_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\normal_model.pth"
WEIGHTS_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\balanced_weights\train_weights.csv"
CAPTURED_IMAGES_DIR = r"C:\Users\Arys\Desktop\Proyecto - 2\captured_images"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEBUG = True  # Activa la depuración para ver la imagen umbralizada

os.makedirs(CAPTURED_IMAGES_DIR, exist_ok=True)

# ================ TRANSFORMACIÓN =================
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# ================ CARGAR MODELO ==================
def load_model_and_classes(model_path, weights_path):
    """Carga el modelo y el mapeo de clases."""
    weights_df = pd.read_csv(weights_path)
    all_classes = sorted(weights_df['Class'].unique())
    class_to_idx = {cls: idx for idx, cls in enumerate(all_classes)}
    idx_to_class = {idx: cls for cls, idx in class_to_idx.items()}
    
    model = models.resnet18(pretrained=False)
    model.fc = nn.Sequential(
        nn.Dropout(0.3),
        nn.Linear(model.fc.in_features, len(class_to_idx))
    )
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model = model.to(DEVICE)
    model.eval()
    return model, idx_to_class

def normalize_brightness(frame):
    """Normaliza el brillo de la imagen."""
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    hsv[:, :, 2] = cv2.normalize(hsv[:, :, 2], None, 0, 255, cv2.NORM_MINMAX)
    return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

def detect_leaf(frame, min_area=500):
    """Detecta una hoja con umbral adaptativo y devuelve el recuadro con contornos mejorados."""
    # Normaliza el brillo
    frame = normalize_brightness(frame)
    
    # Convierte a escala de grises y aplica desenfoque
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Aplica umbral adaptativo (mantenemos el original que te gusta)
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                   cv2.THRESH_BINARY_INV, 21, 5)
    
    # Ligero refinamiento morfológico para mejorar contornos sin cambiar la esencia
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
    
    # Muestra la imagen umbralizada para depuración (si DEBUG=True)
    if DEBUG:
        cv2.imshow("Umbral", cleaned)
    
    # Encuentra contornos
    contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    if contours:
        # Selecciona el contorno más grande
        largest_contour = max(contours, key=cv2.contourArea)
        area = cv2.contourArea(largest_contour)
        if area > min_area:
            x, y, w, h = cv2.boundingRect(largest_contour)
            if w > 30 and h > 30 and w < frame.shape[1] * 0.9 and h < frame.shape[0] * 0.9:
                # Dibuja el contorno en la imagen original
                cv2.drawContours(frame, [largest_contour], -1, (0, 255, 0), 2)
                return (x, y, x+w, y+h), frame[y:y+h, x:x+w]
    
    return None, frame

def predict_image(image, model, idx_to_class, transform):
    """Predice la clase de una imagen y devuelve la confianza."""
    image = transform(image).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        outputs = model(image)
        probs = torch.softmax(outputs, dim=1)
        confidence, predicted = torch.max(probs, 1)
    return idx_to_class[predicted.item()], confidence.item()

def capture_and_predict():
    """Captura imágenes, detecta una hoja y predice en tiempo real."""
    model, idx_to_class = load_model_and_classes(MODEL_PATH, WEIGHTS_PATH)
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: No se pudo abrir la cámara.")
        return

    # Reducir la resolución de la cámara para mejorar FPS
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

    print("Predicciones en tiempo real. Presiona 'q' para guardar imagen, 'Esc' para salir.")
    
    # Variables para estabilización
    last_bbox = None
    last_predicted_class = "Buscando hoja..."
    last_confidence = 0.0
    stable_count = 0
    STABLE_THRESHOLD = 3  # Número de frames consecutivos para considerar una detección estable
    
    with tqdm(total=1, desc="Capturando desde cámara", leave=True) as pbar:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Error: No se pudo leer el frame.")
                break

            # Detecta una hoja
            bbox, roi = detect_leaf(frame, min_area=500)
            predicted_class = last_predicted_class
            confidence = last_confidence

            if bbox:
                # Compara con la detección anterior para estabilizar
                if last_bbox and abs(bbox[0] - last_bbox[0]) < 50 and abs(bbox[1] - last_bbox[1]) < 50:
                    stable_count += 1
                else:
                    stable_count = 1
                    last_bbox = bbox
                
                if stable_count >= STABLE_THRESHOLD:
                    x1, y1, x2, y2 = bbox
                    roi_pil = Image.fromarray(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB))
                    predicted_class, confidence = predict_image(roi_pil, model, idx_to_class, test_transforms)
                    last_predicted_class = predicted_class
                    last_confidence = confidence
                    # Dibuja el rectángulo y la predicción
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    cv2.putText(frame, f"Clase: {predicted_class}", (x1, y1-10), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
                    cv2.putText(frame, f"Confianza: {confidence*100:.2f}%", (x1, y1-30), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
            else:
                stable_count = 0
                last_bbox = None
                predicted_class = "Buscando hoja..."
                confidence = 0.0
                last_predicted_class = predicted_class
                last_confidence = confidence

            # Muestra la predicción en la parte superior
            cv2.putText(frame, f"Pred: {predicted_class}", (10, 30), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(frame, f"Conf: {confidence*100:.2f}%", (10, 60), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
            cv2.imshow("Cámara", frame)

            key = cv2.waitKey(30)  # 30ms (~33 FPS) para un feed más fluido
            if key == ord('q') and bbox:  # Solo guarda si se detecta una hoja
                roi_pil = Image.fromarray(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB))
                save_path = os.path.join(CAPTURED_IMAGES_DIR, 
                                       f"captured_{predicted_class}_{confidence*100:.2f}_{int(time.time())}.jpg")
                roi_pil.save(save_path)
                print(f"Imagen guardada: {save_path}")
                pbar.update(0)
            elif key == 27:  # Esc
                break

        cap.release()
        cv2.destroyAllWindows()
        if DEBUG:
            cv2.destroyWindow("Umbral")

if __name__ == "__main__":
    capture_and_predict()



Predicciones en tiempo real. Presiona 'q' para guardar imagen, 'Esc' para salir.


Capturando desde cámara:   0%|          | 0/1 [03:19<?, ?it/s]


error: OpenCV(4.12.0) D:\a\opencv-python\opencv-python\opencv\modules\highgui\src\window_w32.cpp:1261: error: (-27:Null pointer) NULL window: 'Umbral' in function 'cvDestroyWindow'


## ***Generación del dataset augmentado y Carga de pesos***

In [6]:
import os
import pandas as pd
from PIL import Image
import torch
from torchvision import transforms
from torchvision.transforms.functional import to_pil_image
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# Configuración
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset_resized"
AUGMENTED_OUTPUT_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset_augmented"
AUGMENTED_WEIGHTS = r"C:\Users\Arys\Desktop\Proyecto - 2\augmented_weights\augmented_plant_weights.csv"
AUGMENTATIONS_PER_IMAGE = 5
TARGET_SIZE = (224, 224)
MAX_WORKERS = 16

# Transformaciones para aumento de datos
augment_transforms = transforms.Compose([
    transforms.ToTensor(),  # Convertir imagen PIL a tensor
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
])

def collect_image_paths(input_dir):
    """Recolecta rutas de imágenes .jpg."""
    file_paths = []
    for root, _, files in tqdm(os.walk(input_dir), desc="Recolectando imágenes"):
        for file in files:
            if file.lower().endswith('.jpg'):
                file_paths.append(os.path.join(root, file))
    return file_paths

def augment_and_save_image(file_path, output_dir, augmentations_per_image):
    """Genera imágenes aumentadas y las guarda."""
    results = []
    try:
        # Obtener la ruta relativa y la clase
        relative_path = os.path.relpath(os.path.dirname(file_path), DATASET_PATH)
        output_subdir = os.path.join(output_dir, relative_path)
        os.makedirs(output_subdir, exist_ok=True)
        
        # Generar nombre de clase (incluyendo prefijo: color, grayscale, segmented)
        img_class_parts = relative_path.replace('\\', '/').split('/')
        if len(img_class_parts) < 2:  # Asegurarse de que hay al menos tipo (color/grayscale/segmented) y planta
            return [(file_path, None, None, None, "Ruta relativa inválida")]
        img_class = '_'.join(img_class_parts)  # Ejemplo: color_Apple_Apple_scab
        
        with Image.open(file_path) as img:
            img = img.convert('RGB')
            base_name = os.path.splitext(os.path.basename(file_path))[0]
            
            # Guardar imagen original
            output_path = os.path.join(output_subdir, f"{base_name}.jpg")
            try:
                img.resize(TARGET_SIZE, Image.LANCZOS).save(output_path, 'JPEG', quality=95)
                results.append((file_path, output_path, img_class, 1.0, None))
            except Exception as e:
                results.append((file_path, None, img_class, None, f"Error al guardar imagen original: {str(e)}"))
            
            # Generar imágenes aumentadas
            for i in range(augmentations_per_image):
                try:
                    aug_img = augment_transforms(img)  # Aplica transformaciones (img se convierte a tensor)
                    aug_img_pil = to_pil_image(aug_img)  # Convierte tensor a PIL
                    aug_path = os.path.join(output_subdir, f"{base_name}_aug_{i}.jpg")
                    aug_img_pil.save(aug_path, 'JPEG', quality=95)  # Guardar con PIL
                    results.append((file_path, aug_path, img_class, 0.5, None))  # Peso menor para aumentadas
                except Exception as e:
                    results.append((file_path, None, img_class, None, f"Error al generar imagen aumentada {i}: {str(e)}"))
                
        return results
    except Exception as e:
        return [(file_path, None, None, None, f"Error general: {str(e)}")]

def augment_dataset(input_dir, output_dir, augmentations_per_image):
    """Aumenta el dataset y genera CSV con pesos."""
    os.makedirs(os.path.dirname(AUGMENTED_WEIGHTS), exist_ok=True)
    file_paths = collect_image_paths(input_dir)
    all_results = []
    
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        future_to_file = {executor.submit(augment_and_save_image, path, output_dir, augmentations_per_image): path for path in file_paths}
        for future in tqdm(as_completed(future_to_file), total=len(file_paths), desc="Aumentando imágenes"):
            all_results.extend(future.result())
    
    # Separar resultados válidos y errores
    valid_results = [r for r in all_results if r[1] is not None and r[2] is not None]
    errors = [r for r in all_results if r[4] is not None]
    
    # Crear DataFrame
    data = [(r[2], os.path.basename(r[1]), r[3]) for r in valid_results]
    if not data:
        print(f"Error: No se generaron imágenes válidas. Total errores: {len(errors)}")
        print("\nArchivos con errores:")
        for _, _, _, _, error in errors:
            print(f"  {error}")
        return
    
    df = pd.DataFrame(data, columns=['Class', 'Image', 'Weight'])
    
    # Calcular pesos inversos por clase
    class_counts = df['Class'].value_counts()
    total_images = len(df)
    df['Weight'] = df['Class'].apply(lambda x: total_images / class_counts[x])
    weight_sum = df['Weight'].sum()
    df['Weight'] = df['Weight'] / weight_sum
    
    df.to_csv(AUGMENTED_WEIGHTS, index=False)
    
    # Resumen
    print(f"\n{'=' * 20}")
    print(f"Imágenes totales (originales + aumentadas): {len(df)}")
    print(f"Clases únicas: {len(df['Class'].unique())}")
    print(f"Pesos guardados en: {AUGMENTED_WEIGHTS}")
    print(f"Errores: {len(errors)}")
    if errors:
        print("\nArchivos con errores:")
        for _, _, _, _, error in errors:
            print(f"  {error}")

if __name__ == "__main__":
    augment_dataset(DATASET_PATH, AUGMENTED_OUTPUT_PATH, AUGMENTATIONS_PER_IMAGE)

Recolectando imágenes: 160it [00:00, 321.18it/s]
Aumentando imágenes: 100%|██████████| 162916/162916 [1:20:11<00:00, 33.86it/s]  



Imágenes totales (originales + aumentadas): 977496
Clases únicas: 114
Pesos guardados en: C:\Users\Arys\Desktop\Proyecto - 2\augmented_weights\augmented_plant_weights.csv
Errores: 0


## ***División del dataset augmentado***

In [9]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

# Configuración
AUGMENTED_WEIGHTS = r"C:\Users\Arys\Desktop\Proyecto - 2\augmented_weights\augmented_plant_weights.csv"
OUTPUT_WEIGHTS_DIR = r"C:\Users\Arys\Desktop\Proyecto - 2\augmented_weights"
TRAIN_WEIGHTS = os.path.join(OUTPUT_WEIGHTS_DIR, "aug_train_weights.csv")
VAL_WEIGHTS = os.path.join(OUTPUT_WEIGHTS_DIR, "aug_val_weights.csv")
TEST_WEIGHTS = os.path.join(OUTPUT_WEIGHTS_DIR, "aug_test_weights.csv")
os.makedirs(OUTPUT_WEIGHTS_DIR, exist_ok=True)

def split_augmented_dataset(weights_path, train_ratio=0.7, val_ratio=0.15, sample_fraction=None):
    """Divide el dataset aumentado en entrenamiento, validación y prueba."""
    if not os.path.exists(weights_path):
        raise FileNotFoundError(f"No se encontró {weights_path}")

    df = pd.read_csv(weights_path)
    if not all(col in df.columns for col in ['Class', 'Image', 'Weight']):
        raise ValueError("El CSV debe contener 'Class', 'Image' y 'Weight'")

    if len(df) < 10:
        raise ValueError("El dataset es demasiado pequeño para dividir")

    if sample_fraction is not None and 0 < sample_fraction < 1:
        df = df.groupby('Class', group_keys=False).apply(lambda x: x.sample(frac=sample_fraction, random_state=42)).reset_index(drop=True)
        print(f"Se ha muestreado el {sample_fraction * 100:.1f}% del dataset para pruebas")

    num_classes = len(df['Class'].unique())
    print(f"Clases encontradas: {num_classes}")

    # Divide en entrenamiento y resto (validación + prueba)
    train_df, temp_df = train_test_split(
        df, train_size=train_ratio, stratify=df['Class'], random_state=42
    )
    val_size = val_ratio / (1 - train_ratio)
    val_df, test_df = train_test_split(
        temp_df, train_size=val_size, stratify=temp_df['Class'], random_state=42
    )

    # Guarda los conjuntos
    train_df.to_csv(TRAIN_WEIGHTS, index=False)
    val_df.to_csv(VAL_WEIGHTS, index=False)
    test_df.to_csv(TEST_WEIGHTS, index=False)

    print(f"\n{'=' * 20}")
    print(f"Total imágenes: {len(df)}")
    print(f"Entrenamiento: {len(train_df)} imágenes ({len(train_df['Class'].unique())} clases)")
    print(f"Validación: {len(val_df)} imágenes ({len(val_df['Class'].unique())} clases)")
    print(f"Prueba: {len(test_df)} imágenes ({len(test_df['Class'].unique())} clases)")
    print(f"CSVs guardados en: {OUTPUT_WEIGHTS_DIR}")

if __name__ == "__main__":
    try:
        # Usa sample_fraction=0.05 para trabajar solo con el 5% del dataset
        split_augmented_dataset(AUGMENTED_WEIGHTS, sample_fraction=0.30)
    except Exception as e:
        print(f"Error: {str(e)}")


  df = df.groupby('Class', group_keys=False).apply(lambda x: x.sample(frac=sample_fraction, random_state=42)).reset_index(drop=True)


Se ha muestreado el 30.0% del dataset para pruebas
Clases encontradas: 114

Total imágenes: 293258
Entrenamiento: 205280 imágenes (114 clases)
Validación: 43988 imágenes (114 clases)
Prueba: 43990 imágenes (114 clases)
CSVs guardados en: C:\Users\Arys\Desktop\Proyecto - 2\augmented_weights


## ***Entrenamiento del modelo con imagenes augmentadas y originales***

In [1]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import models, transforms
from torchvision.models import ResNet18_Weights
from tqdm import tqdm

# =================== CONFIGURACIÓN ===================
DATASET_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\plantvillage-dataset_augmented"
WEIGHTS_DIR = r"C:\Users\Arys\Desktop\Proyecto - 2\augmented_weights"
TRAIN_WEIGHTS = os.path.join(WEIGHTS_DIR, "aug_train_weights.csv")
VAL_WEIGHTS = os.path.join(WEIGHTS_DIR, "aug_val_weights.csv")
TEST_WEIGHTS = os.path.join(WEIGHTS_DIR, "aug_test_weights.csv")
MODEL_DIR = r"C:\Users\Arys\Desktop\Proyecto - 2\models"
os.makedirs(MODEL_DIR, exist_ok=True)
MODEL_PATH = os.path.join(MODEL_DIR, "augmented_model.pth")
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 16
NUM_WORKERS = 0
NUM_EPOCHS = 20
EARLY_STOP_PATIENCE = 3
METRICS_LOG = os.path.join(MODEL_DIR, "training_metrics.csv")

if torch.cuda.is_available():
    torch.cuda.empty_cache()
    torch.backends.cudnn.benchmark = True

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class PlantDataset(Dataset):
    def __init__(self, weights_df, root_dir, transform=None, class_to_idx=None):
        self.data = weights_df
        self.root_dir = root_dir
        self.transform = transform
        self.class_to_idx = class_to_idx

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_parts = row['Class'].split('_')
        img_path = f"{self.root_dir}/{img_parts[0]}/{img_parts[1]}/{'_'.join(img_parts[2:])}/{row['Image']}"
        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            label = self.class_to_idx[row['Class']]
            return image, label
        except Exception as e:
            print(f"Error al cargar {img_path}: {str(e)}")
            raise

def create_dataloader(weights_path, root_dir, transform, class_to_idx, batch_size=BATCH_SIZE, use_sampler=True):
    weights_df = pd.read_csv(weights_path)
    dataset = PlantDataset(weights_df, root_dir, transform=transform, class_to_idx=class_to_idx)
    sampler = WeightedRandomSampler(torch.tensor(weights_df['Weight'].values, dtype=torch.float), len(weights_df), replacement=True) if use_sampler else None
    return DataLoader(dataset, batch_size=batch_size, sampler=sampler, shuffle=not use_sampler, num_workers=NUM_WORKERS, pin_memory=torch.cuda.is_available())

def train_model(train_loader, val_loader, start_epoch=1, num_epochs=NUM_EPOCHS, early_stop_patience=EARLY_STOP_PATIENCE):
    model = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    num_classes = len(train_loader.dataset.class_to_idx)
    model.fc = nn.Sequential(nn.Dropout(0.3), nn.Linear(model.fc.in_features, num_classes))
    model = model.to(DEVICE)

    checkpoint_path = os.path.join(MODEL_DIR, f"model_epoch_{start_epoch - 1}.pth")
    if os.path.exists(checkpoint_path):
        model.load_state_dict(torch.load(checkpoint_path))
        print(f"Modelo cargado desde {checkpoint_path}")

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.0005)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2)

    best_acc = 0.0
    epochs_without_improvement = 0

    if start_epoch == 1 and os.path.exists(METRICS_LOG):
        os.remove(METRICS_LOG)

    for epoch in range(start_epoch, num_epochs + 1):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for images, labels in tqdm(train_loader, desc=f"Entrenando época {epoch}/{num_epochs}"):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = 100 * correct / total

        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc="Validando"):
                images, labels = images.to(DEVICE), labels.to(DEVICE)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_acc = 100 * val_correct / val_total
        scheduler.step(val_acc)

        print(f"\nÉpoca {epoch}: Pérdida Entrenamiento: {train_loss:.4f}, Precisión Entrenamiento: {train_acc:.2f}%")
        print(f"Precisión Validación: {val_acc:.2f}%")

        torch.save(model.state_dict(), os.path.join(MODEL_DIR, f"model_epoch_{epoch}.pth"))
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), MODEL_PATH)
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1

        with open(METRICS_LOG, "a") as f:
            if epoch == start_epoch:
                f.write("Epoch,TrainLoss,TrainAcc,ValAcc\n")
            f.write(f"{epoch},{train_loss:.4f},{train_acc:.2f},{val_acc:.2f}\n")

        if epochs_without_improvement >= early_stop_patience:
            print(f"Parando temprano en época {epoch}")
            break
    return model

def evaluate_model(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc="Evaluando en prueba"):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"\n{'='*20}\nPrecisión en prueba: {100 * correct / total:.2f}%")
    return 100 * correct / total

if __name__ == "__main__":
    train_df = pd.read_csv(TRAIN_WEIGHTS)
    class_to_idx = {cls: idx for idx, cls in enumerate(sorted(train_df['Class'].unique()))}

    train_loader = create_dataloader(TRAIN_WEIGHTS, DATASET_PATH, train_transforms, class_to_idx)
    val_loader = create_dataloader(VAL_WEIGHTS, DATASET_PATH, val_transforms, class_to_idx, use_sampler=False)
    test_loader = create_dataloader(TEST_WEIGHTS, DATASET_PATH, val_transforms, class_to_idx, use_sampler=False)

    # Cambia aquí si quieres reanudar desde cierta época
    model = train_model(train_loader, val_loader, start_epoch=13)
    evaluate_model(model, test_loader)


Modelo cargado desde C:\Users\Arys\Desktop\Proyecto - 2\models\model_epoch_12.pth


Entrenando época 13/20: 100%|██████████| 12830/12830 [35:05<00:00,  6.09it/s] 
Validando: 100%|██████████| 2750/2750 [02:54<00:00, 15.79it/s]



Época 13: Pérdida Entrenamiento: 0.3102, Precisión Entrenamiento: 89.52%
Precisión Validación: 94.93%


Entrenando época 14/20: 100%|██████████| 12830/12830 [37:23<00:00,  5.72it/s] 
Validando: 100%|██████████| 2750/2750 [02:37<00:00, 17.48it/s]



Época 14: Pérdida Entrenamiento: 0.3046, Precisión Entrenamiento: 89.66%
Precisión Validación: 95.49%


Entrenando época 15/20: 100%|██████████| 12830/12830 [30:03<00:00,  7.11it/s]
Validando: 100%|██████████| 2750/2750 [02:19<00:00, 19.67it/s]



Época 15: Pérdida Entrenamiento: 0.3019, Precisión Entrenamiento: 89.77%
Precisión Validación: 96.11%


Entrenando época 16/20: 100%|██████████| 12830/12830 [29:33<00:00,  7.23it/s] 
Validando: 100%|██████████| 2750/2750 [02:25<00:00, 18.87it/s]



Época 16: Pérdida Entrenamiento: 0.2993, Precisión Entrenamiento: 89.82%
Precisión Validación: 96.28%


Entrenando época 17/20: 100%|██████████| 12830/12830 [29:42<00:00,  7.20it/s]
Validando: 100%|██████████| 2750/2750 [02:44<00:00, 16.74it/s]



Época 17: Pérdida Entrenamiento: 0.2899, Precisión Entrenamiento: 90.07%
Precisión Validación: 95.74%


Entrenando época 18/20: 100%|██████████| 12830/12830 [31:03<00:00,  6.89it/s]  
Validando: 100%|██████████| 2750/2750 [02:26<00:00, 18.78it/s]



Época 18: Pérdida Entrenamiento: 0.2877, Precisión Entrenamiento: 90.19%
Precisión Validación: 95.32%


Entrenando época 19/20: 100%|██████████| 12830/12830 [28:01<00:00,  7.63it/s]
Validando: 100%|██████████| 2750/2750 [02:27<00:00, 18.63it/s]



Época 19: Pérdida Entrenamiento: 0.2851, Precisión Entrenamiento: 90.30%
Precisión Validación: 95.44%
Parando temprano en época 19


Evaluando en prueba: 100%|██████████| 2750/2750 [04:07<00:00, 11.12it/s]


Precisión en prueba: 95.51%





## ***Predicción con cámara (modelo augmentado)***

In [3]:
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import models
from torchvision.models import ResNet18_Weights
import os
import pandas as pd
from PIL import Image
import numpy as np
import textwrap

# ================= CONFIGURACIÓN =================
MODEL_PATH = r"C:\Users\Arys\Desktop\Proyecto - 2\models\augmented_model.pth"
CLASSES_CSV = r"C:\Users\Arys\Desktop\Proyecto - 2\augmented_weights\aug_train_weights.csv"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CAPTURED_IMAGES_DIR = r"C:\Users\Arys\Desktop\Proyecto - 2\captured_images"
os.makedirs(CAPTURED_IMAGES_DIR, exist_ok=True)

# ================ CLASES =================
train_df = pd.read_csv(CLASSES_CSV)
classes = sorted(train_df['Class'].unique())
class_to_idx = {cls: idx for idx, cls in enumerate(classes)}
idx_to_class = {idx: cls for cls, idx in class_to_idx.items()}

# ================ TRANSFORMACIONES =================
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ColorJitter(brightness=0.4, contrast=0.4),  # Aumentado para manejar vistas variables
    transforms.RandomRotation(180),  # Para manejar perspectivas (arriba/abajo)
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# ================ CARGAR MODELO =================
model = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(model.fc.in_features, len(classes))
)
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model = model.to(DEVICE)
model.eval()

# ================ CAPTURA DE VIDEO =================
cap = cv2.VideoCapture(0)
cv2.namedWindow("Detección en tiempo real", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Detección en tiempo real", 900, 600)
cv2.namedWindow("Umbral Blanco y Negro", cv2.WINDOW_NORMAL)
cv2.resizeWindow("Umbral Blanco y Negro", 300, 300)

print("Presiona 'q' para salir, 's' para guardar imagen.")
while True:
    ret, frame = cap.read()
    if not ret:
        print("No se pudo capturar imagen.")
        break

    # Detección de hoja usando color verde ampliado
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    lower_green = np.array([15, 20, 20])  # Rango más amplio para "peach"
    upper_green = np.array([100, 255, 255])
    mask = cv2.inRange(hsv, lower_green, upper_green)
    blurred = cv2.GaussianBlur(mask, (9, 9), 0)
    _, thresh_green = cv2.threshold(blurred, 80, 255, cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
    cleaned = cv2.morphologyEx(thresh_green, cv2.MORPH_CLOSE, kernel, iterations=4)

    # Umbral blanco y negro para depuración
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    _, thresh_bw = cv2.threshold(gray, 70, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    bbox = None
    roi = frame
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        area = cv2.contourArea(largest_contour)
        if area > 300:  # Reducido para detectar hojas individuales
            x, y, w, h = cv2.boundingRect(largest_contour)
            if w > 30 and h > 30 and w < frame.shape[1] * 0.95 and h < frame.shape[0] * 0.95:
                cv2.drawContours(frame, [largest_contour], -1, (0, 255, 0), 2)
                bbox = (x, y, x + w, y + h)
                roi = frame[y:y+h, x:x+w]
                cv2.putText(frame, f"Área: {area:.0f}", (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

    # Preprocesamiento y predicción
    rgb_image = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
    pil_image = Image.fromarray(rgb_image)
    input_tensor = transform(pil_image).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        outputs = model(input_tensor)
        probs = torch.softmax(outputs, dim=1)
        confidence, predicted = torch.max(probs, 1)
        label = idx_to_class[predicted.item()] if confidence.item() > 0.6 else "Desconocido"

    # Mostrar texto
    wrapped_text = textwrap.wrap(label, width=40)
    y = 30
    for line in wrapped_text:
        cv2.putText(frame, line, (10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        y += 25
    cv2.putText(frame, f"Conf: {confidence.item()*100:.2f}%", (10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)

    # Mostrar ventanas
    cv2.imshow("Detección en tiempo real", frame)
    cv2.imshow("Umbral Blanco y Negro", thresh_bw)

    # Salir o guardar
    key = cv2.waitKey(50) & 0xFF
    if key == ord('q'):
        break
    elif key == ord('s') and bbox:
        roi_pil = Image.fromarray(rgb_image)
        save_path = os.path.join(CAPTURED_IMAGES_DIR, f"captured_{label}_{confidence.item()*100:.2f}_{int(time.time())}.jpg")
        roi_pil.save(save_path)
        print(f"Imagen guardada: {save_path}")

cap.release()
cv2.destroyAllWindows()
cv2.destroyWindow("Umbral Blanco y Negro")

Presiona 'q' para salir, 's' para guardar imagen.


error: OpenCV(4.12.0) D:\a\opencv-python\opencv-python\opencv\modules\highgui\src\window_w32.cpp:1261: error: (-27:Null pointer) NULL window: 'Umbral Blanco y Negro' in function 'cvDestroyWindow'
