## **Identificación De Especies de Mosquitos - Data Science**

Carol Arévalo, Stefano Aragoni, Luis Santos, Diego Perdomo

### *Modelo*

El Ministerio de Salud y Asistencia Social (MSPAS) ha informado que Guatemala ha experimentado un aumento significativo en el número de casos de dengue en 2023, con más de 10,000 casos confirmados y 21 muertes hasta el 12 de agosto. El MSPAS ha declarado una alerta epidemiológica en todo el país para combatir la propagación del dengue (Gobierno De Guatemala, 2023).  


Con esto en mente, el presente proyecto busca utilizar técnicas de procesamiento de imágenes y aprendizaje automático para identificar la especie de mosquito Aedes aegypti a partir de imágenes, con el fin de apoyar los esfuerzos de prevención del dengue en Guatemala.

#### Librerías

Como primer paso, se importan las librerías necesarias para el desarrollo del proyecto.

In [47]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from keras.preprocessing import image
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import BatchNormalization, Conv2D
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.regularizers import l2
from keras.preprocessing.image import load_img, img_to_array
from keras.preprocessing.image import ImageDataGenerator
import shutil
import os
from keras.preprocessing.image import array_to_img
from sklearn.utils import shuffle


#### Cargas las imagenes

In [48]:
import pandas as pd

data = pd.read_csv('phase2_train_v0.csv')


#### Preprocesamiento

##### Analizar la Distribución de Clases


In [49]:
# Ver la distribución de las clases
class_distribution = data['class_label'].value_counts()
print(class_distribution)


albopictus            4612
culex                 4563
culiseta               622
japonicus/koreicus     429
anopheles               84
aegypti                 47
Name: class_label, dtype: int64


#### Data Augmentation

In [50]:
# Crear un generador de datos
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Obtener el número máximo de imágenes por clase
max_images = data['class_label'].value_counts().max()

# Crear un nuevo dataframe con las imágenes originales y las nuevas
data_balanced = pd.DataFrame(columns=['img_fName', 'class_label'])

# Agregar las imágenes originales
data_balanced = data_balanced.append(data, ignore_index=True)

# Para cada clase, generar imágenes adicionales
for class_label in data['class_label'].unique():
    class_count = sum(data['class_label'] == class_label)
    if class_count < max_images:
        # Crear imágenes adicionales para esta clase
        num_new_images = max_images - class_count
        
        # Seleccionar imágenes de la clase actual
        class_images = data[data['class_label'] == class_label]['img_fName']
        
        # Generar y guardar nuevas imágenes
        for i, original_image_name in enumerate(class_images):
            img = load_img(os.path.join('resized_normalized', original_image_name))
            img_array = img_to_array(img)
            img_array = img_array.reshape((1,) + img_array.shape)
            
            i = 0
            for batch in datagen.flow(img_array, batch_size=1):
                i += 1
                if i > num_new_images:
                    break  # de lo contrario, el generador sería infinito
                new_img = array_to_img(batch[0])
                
                # Guardar la nueva imagen
                new_img.save(os.path.join('resized_normalized', f'new_image_{i}.jpeg'))

                # Agregar la nueva imagen al dataframe
                data_balanced = data_balanced.append({
                    'img_fName': f'new_image_{i}.jpeg',
                    'class_label': class_label
                }, ignore_index=True)
                
            if i >= num_new_images:
                break


  data_balanced = data_balanced.append(data, ignore_index=True)
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.ap

  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced = data_balanced.append({
  data_balanced 

In [51]:
# mezclar el dataframe
data_balanced = shuffle(data_balanced)

# Ver la distribución de las clases
print(data_balanced['class_label'].value_counts())

data_balanced.to_csv('data_balanced.csv', index=False)


albopictus            4612
culex                 4612
anopheles             4612
culiseta              4612
japonicus/koreicus    4612
aegypti               4612
Name: class_label, dtype: int64


In [52]:
# Tamaño deseado de las imágenes
img_width, img_height = 150, 150

# 1. Hacer Resize de imágenes:
# Ya está realizado en el 'target_size' del generador de datos.

# 2. Hacer Data Augmentation:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

# 3. Usar Batches:
# Se están usando batches en el código con 'batch_size=32' en los generadores.

train_generator = train_datagen.flow_from_dataframe(
    dataframe=data_balanced,
    directory='resized_normalized',
    x_col='img_fName',
    y_col='class_label',
    target_size=(img_width, img_height),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = train_datagen.flow_from_dataframe(
    dataframe=data_balanced,
    directory='resized_normalized',
    x_col='img_fName',
    y_col='class_label',
    target_size=(img_width, img_height),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)


Found 22138 validated image filenames belonging to 6 classes.
Found 5534 validated image filenames belonging to 6 classes.


#### Definición del Modelo

In [53]:
model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.01), input_shape=(img_width, img_height, 3)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(256, (3, 3), activation='relu', kernel_regularizer=l2(0.01)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Flatten())

model.add(Dense(1024, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.6))  # Incrementa el Dropout

model.add(Dense(512, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.6))  # Incrementa el Dropout

model.add(Dense(6, activation='softmax'))

##### Compilación del modelo

In [54]:
# Compilación del modelo
opt = Adam(learning_rate=0.0001)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])



#### Entrenamiento

In [55]:
from PIL import Image
Image.MAX_IMAGE_PIXELS = None

# Entrenamiento

early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
checkpointer = ModelCheckpoint(filepath="best_model.h5", verbose=1, save_best_only=True)

history = model.fit(
    train_generator,
    steps_per_epoch=200,
    epochs=100,
    validation_data=validation_generator,
    validation_steps=50,
    callbacks=[early_stop, checkpointer]
)



Epoch 1/100
Epoch 1: val_loss improved from inf to 28.82435, saving model to best_model.h5


  saving_api.save_model(


Epoch 2/100
Epoch 2: val_loss improved from 28.82435 to 25.49535, saving model to best_model.h5
Epoch 3/100
Epoch 3: val_loss improved from 25.49535 to 21.80010, saving model to best_model.h5
Epoch 4/100

KeyboardInterrupt: 

#### Resultados del Modelo

In [None]:
# Gráfica
import matplotlib.pyplot as plt

# Precisión
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Pérdida
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

#### Validacion con imagen 

In [None]:
def predict_mosquito_type(model, img_path):
    """
    Predicts the type of mosquito based on an image.

    Parameters:
        model : tensorflow.keras.Model
            The trained model to use for prediction.
        img_path : str
            Path to the image file to predict.
    """
    # Load and resize the image
    img = image.load_img(img_path, target_size=(150, 150))

    # Display the image
    plt.imshow(img)
    plt.title("Input Image")
    plt.axis("off")
    plt.show()

    # Convert the image to a numpy array and scale the pixel values to [0, 1]
    img_array = image.img_to_array(img) / 255.

    # Expand dimensions to represent a batch size of 1
    img_batch = np.expand_dims(img_array, axis=0)

    # Get the model's prediction
    prediction = model.predict(img_batch)

    # Get the index of the highest predicted value
    predicted_index = np.argmax(prediction)

    # Retrieve the class labels from the training data generator
    labels = (train_generator.class_indices)
    labels = dict((v, k) for k, v in labels.items())  # flip the key, values in the dictionary

    # Get the string label for the prediction
    predicted_label = labels[predicted_index]

    # Display the prediction
    print(f"Prediction: {predicted_label} ({100*np.max(prediction):.2f}%)")

    return predicted_label


In [None]:
predict_mosquito_type(model, "prueba.jpeg")

