In [17]:
import pandas as pd
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
from collections import Counter

import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Trabajo previo en los datos

In [2]:
train_directory = "Dataset/train"
validation_directory = "Dataset/valid"

In [23]:
def cargar_imagenes_y_etiquetas(directorio):
    imagenes = []
    etiquetas = []
    etiquetas_indices = {}
    indice_actual = 0
    
    for root, _, files in os.walk(directorio):
        for file in files:
            # Verificar si el archivo es una imagen (puedes ajustar esta condición según el formato de tus imágenes)
            if file.endswith(".jpg") or file.endswith(".png") or file.endswith(".jpeg"):
                # Obtener la ruta completa de la imagen
                imagen_path = os.path.join(root, file)
                # El nombre del directorio padre es la etiqueta (Y)
                etiqueta = os.path.basename(root)
                
                # Si la etiqueta aún no está en el diccionario de índices, agregarla
                if etiqueta not in etiquetas_indices:
                    etiquetas_indices[etiqueta] = indice_actual
                    indice_actual += 1
                
                # Cargar la imagen
                imagen = cv2.imread(imagen_path)  
                
                # Agregar la imagen y su etiqueta a las listas correspondientes
                imagenes.append(imagen)
                etiquetas.append(etiqueta)
    
    # Devolver las imágenes, las etiquetas y el diccionario de índices de etiquetas
    return imagenes, etiquetas, etiquetas_indices


X_train, y_train, y_train_index = cargar_imagenes_y_etiquetas(train_directory)

X_val, y_val, y_val_index = cargar_imagenes_y_etiquetas(validation_directory)

print("Ejemplos de imágenes y etiquetas de entrenamiento:")
print(f"Imagen: {X_train[1][1][:3]}..., Etiqueta: {y_train[1]}, Índice de la clase: {y_train_index.get(y_train[1])}")

print("\nEjemplos de imágenes y etiquetas de validación:")
print(f"Imagen: {X_val[1][1][:3]}..., Etiqueta: {y_val[1]}, Índice de la clase: {y_val_index.get(y_train[1])}")

{'88 AN': 0, 'ADMIRAL RED': 1, 'ADONIS': 2, 'AFRICAN GIANT SWALLOWTAIL': 3, 'AMERICAN SNOOT': 4, 'APPOLLO': 5, 'ATALA': 6, 'AWL BANDED COMMON': 7, 'BANDED GOLD': 8, 'BANDED HELICONIAN ORANGE': 9, 'BANDED PEACOCK': 10, 'BARRED FLASHER TWO': 11, 'BECKERS WHITE': 12, 'BIRDWING CAIRNS': 13, 'BLACK HAIRSTREAK': 14, 'BLUE CROW SPOTTED': 15, 'BLUE MORPHO': 16, 'BROWN SIPROETA': 17, 'CABBAGE WHITE': 18, 'CATTLEHEART CELLED GREEN': 19, 'CHECQUERED SKIPPER': 20, 'CHESTNUT': 21, 'CLEOPATRA': 22, 'CLOAK MOURNING': 23, 'CLODIUS PARNASSIAN': 24, 'CLOUDED SULPHUR': 25, 'COMA EASTERN': 26, 'COMMON WOOD-NYMPH': 27, 'COPPER PURPLISH': 28, 'COPPER TAIL': 29, 'CRACKER RED': 30, 'CRECENT': 31, 'CRIMSON PATCH': 32, 'DANAID EGGFLY': 33, 'DAPPLE EASTERN WHITE': 34, 'DOGFACE SOUTHERN': 35, 'EASTERN ELFIN PINE': 36, 'EGGFLY GREAT': 37, 'ELBOWED PIERROT': 38, 'GREAT JAY': 39, 'GREY HAIRSTREAK': 40, 'HAIRSTREAK PURPLE': 41, 'INDRA SWALLOW': 42, 'IPHICLUS SISTER': 43, 'JULIA': 44, 'KITE PAPER': 45, 'LADY PAINTED':

In [4]:
def contar_imagenes_por_clase(etiquetas):
    conteo = Counter(etiquetas)
    return conteo

# Calcular el recuento de imágenes por clase en el conjunto de entrenamiento y validación
count_train = contar_imagenes_por_clase(y_train)
count_validation = contar_imagenes_por_clase(y_val)

df_count_train = pd.DataFrame(list(count_train.items()), columns=['Clase', 'Total de imágenes'])
df_count_validation = pd.DataFrame(list(count_validation.items()), columns=['Clase', 'Total de imágenes'])

df_count_train.to_excel('Recursos/Recuentro Entrenamiento.xlsx', index=False)
df_count_validation.to_excel('Recursos/Recuento Validación.xlsx', index=False)

Revisando manualmente los datos en excel, me encuentro conque la clase menos popular es la crimson patch, con 96 imágenes. Y la clase más popular tiene 184 imágenes, esta corresponde a Cloak Mourning, tendremos en cuenta estos puntos para la revisión de las métricas del modelo, y ver cómo predice cada una.

Dicho esto, procedemos a normalizar las imágenes:

In [5]:
X_train = np.array(X_train)
X_val = np.array(X_val)

print("Antes de la normalización: " + str(X_train[1][1][:3]))
print("Antes de la normalización: " + str(X_val[1][1][:3]))

X_train = X_train / 255.0
X_val = X_val / 255.0

print("Después de la normalización: " + str(X_train[1][1][:3]))
print("Después de la normalización: " + str(X_val[1][1][:3]))

Antes de la normalización: [[1 0 2]
 [0 0 1]
 [2 0 4]]
Antes de la normalización: [[ 0 39 26]
 [ 0 40 27]
 [ 0 40 27]]
Después de la normalización: [[0.00392157 0.         0.00784314]
 [0.         0.         0.00392157]
 [0.00784314 0.         0.01568627]]
Después de la normalización: [[0.         0.15294118 0.10196078]
 [0.         0.15686275 0.10588235]
 [0.         0.15686275 0.10588235]]


In [6]:
print("Datos de training, X e Y respectivamente: " + str(len(X_train)), ", " + str(len(y_train)))
print("Datos de validación, X e Y respectivamente: " + str(len(X_val)), ", " + str(len(y_val)))

Datos de training, X e Y respectivamente: 9092 , 9092
Datos de validación, X e Y respectivamente: 1949 , 1949


# Creación del modelo.

Asignación de hiperparámetros.

In [11]:
epochs=15
img_rows,img_cols=180, 180 # Todas las imágenes vienen redimensionadas a 180x180 píxeles
input_shape=(img_rows,img_cols,3)
patience_factor = 2     

In [12]:
model = keras.Sequential([
  Conv2D(32, kernel_size=(3, 3), data_format="channels_last",input_shape=input_shape, activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Conv2D(128, kernel_size=(3, 3), activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Conv2D(256, kernel_size=(3, 3), activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  MaxPooling2D(pool_size=(2, 2)),
  Flatten(),
  Dense(128, activation='relu'),
  Dropout(0.25),
  Dense(6, activation='softmax')
])   

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=patience_factor, verbose=1)

In [18]:
X_train = np.array(X_train)
X_val = np.array(X_val)

y_train = np.array(y_train)
y_val = np.array(y_val)

# Convertir a one-hot encoding para esta clasificación multiclase
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)

ValueError: invalid literal for int() with base 10: '88 AN'

In [16]:
history = model.fit(X_train, y_train, 
                    epochs=epochs, 
                    batch_size=32, 
                    validation_data=(X_val, y_val), 
                    callbacks=[early_stopping])

ValueError: Unrecognized data type: x=[[[[0.13333333 0.18823529 0.2627451 ]
   [0.14117647 0.19607843 0.27058824]
   [0.14901961 0.20392157 0.27843137]
   ...
   [0.14117647 0.17647059 0.23137255]
   [0.14117647 0.17647059 0.23137255]
   [0.14117647 0.17647059 0.23137255]]

  [[0.12941176 0.18431373 0.25882353]
   [0.1372549  0.19215686 0.26666667]
   [0.14509804 0.2        0.2745098 ]
   ...
   [0.14117647 0.17647059 0.23137255]
   [0.14117647 0.17647059 0.23137255]
   [0.14117647 0.17647059 0.23137255]]

  [[0.12941176 0.18431373 0.25882353]
   [0.1372549  0.19215686 0.26666667]
   [0.14509804 0.2        0.2745098 ]
   ...
   [0.14117647 0.17647059 0.23137255]
   [0.14117647 0.17647059 0.23137255]
   [0.14117647 0.17647059 0.23137255]]

  ...

  [[0.16078431 0.29803922 0.10196078]
   [0.16470588 0.30588235 0.10196078]
   [0.16862745 0.30980392 0.09803922]
   ...
   [0.59215686 0.66666667 0.30196078]
   [0.58823529 0.6627451  0.29803922]
   [0.58823529 0.6627451  0.29803922]]

  [[0.18431373 0.32941176 0.10980392]
   [0.18823529 0.3372549  0.10980392]
   [0.19215686 0.34117647 0.10588235]
   ...
   [0.57647059 0.65098039 0.28627451]
   [0.57254902 0.64705882 0.28235294]
   [0.57254902 0.64705882 0.28235294]]

  [[0.18039216 0.32941176 0.09411765]
   [0.18431373 0.3372549  0.09411765]
   [0.18823529 0.34117647 0.09803922]
   ...
   [0.56862745 0.64313725 0.27843137]
   [0.56470588 0.63921569 0.2745098 ]
   [0.56078431 0.63529412 0.27058824]]]


 [[[0.00784314 0.00392157 0.01176471]
   [0.         0.         0.00392157]
   [0.00784314 0.         0.01568627]
   ...
   [0.16470588 0.31372549 0.14901961]
   [0.16862745 0.29019608 0.1372549 ]
   [0.         0.09019608 0.        ]]

  [[0.00392157 0.         0.00784314]
   [0.         0.         0.00392157]
   [0.00784314 0.         0.01568627]
   ...
   [0.16862745 0.31372549 0.15686275]
   [0.16862745 0.29019608 0.14509804]
   [0.         0.08235294 0.        ]]

  [[0.         0.         0.00392157]
   [0.         0.         0.00392157]
   [0.01176471 0.         0.01960784]
   ...
   [0.16470588 0.30588235 0.16470588]
   [0.16470588 0.27843137 0.14901961]
   [0.         0.07058824 0.        ]]

  ...

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]]


 [[[0.53333333 0.66666667 0.62352941]
   [0.53333333 0.65882353 0.61568627]
   [0.55294118 0.65098039 0.61176471]
   ...
   [0.13333333 0.45098039 0.41568627]
   [0.12941176 0.43921569 0.40392157]
   [0.12156863 0.43137255 0.39607843]]

  [[0.53333333 0.65882353 0.61568627]
   [0.5372549  0.65490196 0.61176471]
   [0.54901961 0.64705882 0.60784314]
   ...
   [0.14901961 0.44313725 0.41176471]
   [0.14117647 0.43529412 0.40392157]
   [0.1372549  0.43137255 0.4       ]]

  [[0.54117647 0.65490196 0.61960784]
   [0.53333333 0.64705882 0.61176471]
   [0.54901961 0.64313725 0.61176471]
   ...
   [0.17254902 0.42352941 0.40392157]
   [0.16862745 0.41960784 0.4       ]
   [0.16470588 0.41568627 0.39607843]]

  ...

  [[0.15294118 0.59607843 0.33333333]
   [0.15294118 0.59607843 0.33333333]
   [0.16078431 0.59215686 0.33333333]
   ...
   [0.09411765 0.6        0.37647059]
   [0.08627451 0.59215686 0.36862745]
   [0.08627451 0.59215686 0.36862745]]

  [[0.15294118 0.59607843 0.33333333]
   [0.15294118 0.59607843 0.33333333]
   [0.15686275 0.58823529 0.32941176]
   ...
   [0.11372549 0.61960784 0.39607843]
   [0.10588235 0.61176471 0.38823529]
   [0.10196078 0.60784314 0.38431373]]

  [[0.15686275 0.6        0.3372549 ]
   [0.15294118 0.59607843 0.33333333]
   [0.15686275 0.58823529 0.32941176]
   ...
   [0.12941176 0.63529412 0.41176471]
   [0.11764706 0.62352941 0.4       ]
   [0.10980392 0.61568627 0.39215686]]]


 ...


 [[[0.         0.         0.02745098]
   [0.01176471 0.02352941 0.04313725]
   [0.         0.         0.00392157]
   ...
   [0.25490196 0.50196078 0.53333333]
   [0.2        0.43529412 0.45882353]
   [0.05882353 0.29411765 0.31764706]]

  [[0.         0.         0.02745098]
   [0.00784314 0.01960784 0.03921569]
   [0.         0.         0.00392157]
   ...
   [0.25882353 0.50196078 0.5254902 ]
   [0.18431373 0.41960784 0.44313725]
   [0.03529412 0.27058824 0.29411765]]

  [[0.         0.         0.02745098]
   [0.         0.01176471 0.03137255]
   [0.         0.         0.00392157]
   ...
   [0.23137255 0.4745098  0.49803922]
   [0.14117647 0.37647059 0.4       ]
   [0.01176471 0.23921569 0.25882353]]

  ...

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]]


 [[[1.         1.         1.        ]
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   ...
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   [1.         1.         1.        ]]

  [[1.         1.         1.        ]
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   ...
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   [1.         1.         1.        ]]

  [[1.         1.         1.        ]
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   ...
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   [1.         1.         1.        ]]

  ...

  [[1.         1.         1.        ]
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   ...
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   [1.         1.         1.        ]]

  [[1.         1.         1.        ]
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   ...
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   [1.         1.         1.        ]]

  [[1.         1.         1.        ]
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   ...
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   [1.         1.         1.        ]]]


 [[[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   ...
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  ...

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   ...
   [1.         1.         1.        ]
   [1.         1.         1.        ]
   [0.81176471 0.81176471 0.81176471]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   ...
   [1.         1.         1.        ]
   [0.99215686 0.99215686 0.99215686]
   [0.80392157 0.80392157 0.80392157]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]
   ...
   [1.         1.         1.        ]
   [0.99215686 0.99215686 0.99215686]
   [0.79607843 0.79607843 0.79607843]]]] (of type <class 'numpy.ndarray'>)