<a href="https://colab.research.google.com/github/LuisPrieto123/MINE_4210_ADL_202520/blob/main/Copia_de_MATI_ADL_Gr9_Proy_Mod1_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

MODELO VERSION 2 PARA CLASIFICACION DE ESPECIES PICIFORMES

In [None]:
!nvidia-smi

Mon Nov 24 20:04:52 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          Off |   00000000:00:05.0 Off |                    0 |
| N/A   28C    P0             50W /  400W |       0MiB /  81920MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import os
import shutil
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

# --- CONFIGURACIÓN DE PARÁMETROS GLOBALES ---
# Directorio que contiene las carpetas Piciformes y No_Piciformes
BASE_DATA_DIR = '/content/drive/MyDrive/MATI/ADL_PRY/MOD1'
# Nuevo directorio donde se crearán las carpetas train, validation y test
OUTPUT_DATA_DIR = '/content/drive/MyDrive/MATI/ADL_PRY/MOD1_DTOS'

# Parámetros de división de datos
TEST_SPLIT = 0.15
VALIDATION_SPLIT = 0.15
RANDOM_SEED = 42

# Parámetros del modelo y entrenamiento
IMAGE_SIZE = (300, 300)
BATCH_SIZE = 64
NUM_CLASSES = 2  # Se utilizan 2 clases de salida
EPOCHS_PHASE_1 = 10
EPOCHS_PHASE_2 = 15
# Número de capas para ajuste fino en el modelo base EfficientNetB3
FINE_TUNE_AT = 150

# Parámetros del Clasificador
UNITS_DENSE_1 = 512
DROPOUT_1 = 0.5
UNITS_DENSE_2 = 256
DROPOUT_2 = 0.3

In [None]:
# =======================================================================
# 1. FUNCIÓN DE PREPARACIÓN Y DIVISIÓN DEL DATASET
# =======================================================================
from google.colab import drive
drive.mount('/content/drive')

def split_and_copy_data(base_dir, output_dir, test_size, val_size, seed):
    """Divide las imágenes y las copia a los directorios de salida (train/val/test)."""

    val_from_train_size = val_size / (1 - test_size)
    classes = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

    for split in ['train', 'validation', 'test']:
        for class_name in classes:
            os.makedirs(os.path.join(output_dir, split, class_name), exist_ok=True)

    print(f"Clases encontradas: {classes}")

    for class_name in classes:
        class_path = os.path.join(base_dir, class_name)
        all_files = [os.path.join(class_path, f) for f in os.listdir(class_path)
                     if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

        if not all_files:
            print(f"Advertencia: No se encontraron imágenes en {class_name}")
            continue

        train_val_files, test_files = train_test_split(
            all_files, test_size=test_size, random_state=seed
        )

        train_files, val_files = train_test_split(
            train_val_files, test_size=val_from_train_size, random_state=seed
        )

        print(f"\n--- {class_name} ---")
        splits_data = {
            'train': train_files,
            'validation': val_files,
            'test': test_files
        }

        for split_name, file_list in splits_data.items():
            dest_dir = os.path.join(output_dir, split_name, class_name)
            for file_path in file_list:
                shutil.copy(file_path, dest_dir)
            print(f"Copiados {len(file_list)} archivos a {split_name}")


Mounted at /content/drive


In [None]:
# -----------------------------------------------------------------------
# EJECUTAR PREPARACIÓN DE DATOS
# -----------------------------------------------------------------------
if not os.path.exists(OUTPUT_DATA_DIR):
    print("Iniciando la división y copia de archivos...")
    split_and_copy_data(BASE_DATA_DIR, OUTPUT_DATA_DIR, TEST_SPLIT, VALIDATION_SPLIT, RANDOM_SEED)
else:
    print("Directorio de datos dividido ya existe. Saltando la copia.")


# =======================================================================
# 2. CARGA DE DATOS USANDO GENERADORES
# =======================================================================

TRAIN_DIR = os.path.join(OUTPUT_DATA_DIR, 'train')
VAL_DIR = os.path.join(OUTPUT_DATA_DIR, 'validation')
TEST_DIR = os.path.join(OUTPUT_DATA_DIR, 'test')

# Obtener la función de preprocesamiento de EfficientNet
preprocess_func = tf.keras.applications.efficientnet.preprocess_input

# Generador para entrenamiento con Aumento de Datos (Data Augmentation)
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_func,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

# Generadores para validación y prueba (solo preprocesamiento)
val_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_func
)
test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_func
)

# Creación de generadores de datos
try:
    train_generator = train_datagen.flow_from_directory(TRAIN_DIR, target_size=IMAGE_SIZE,
                                                         batch_size=BATCH_SIZE, class_mode='categorical')
    validation_generator = val_datagen.flow_from_directory(VAL_DIR, target_size=IMAGE_SIZE,
                                                           batch_size=BATCH_SIZE, class_mode='categorical')
    test_generator = test_datagen.flow_from_directory(TEST_DIR, target_size=IMAGE_SIZE,
                                                       batch_size=BATCH_SIZE, class_mode='categorical',
                                                       shuffle=False)
except Exception as e:
    print(f"ERROR: No se pudieron cargar los generadores de datos. Asegúrate de que las rutas sean correctas y que la división de datos se haya ejecutado. {e}")
    exit()

print("Mapeo de Clases:", train_generator.class_indices)

Directorio de datos dividido ya existe. Saltando la copia.
Found 10178 images belonging to 2 classes.
Found 2181 images belonging to 2 classes.
Found 2181 images belonging to 2 classes.
Mapeo de Clases: {'No_Piciformes': 0, 'Piciformes': 1}


In [None]:
# =======================================================================
# 3. DEFINICIÓN Y COMPILACIÓN DEL MODELO
# =======================================================================

def build_transfer_model():
    # CAMBIO 5: Cargar EfficientNetB3
    base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=IMAGE_SIZE + (3,))
    base_model.trainable = False # Congelar para la Fase 1

    x = base_model.output
    x = GlobalAveragePooling2D()(x)

    # Capa Densa 1 (Head Clasificador)
    x = Dense(UNITS_DENSE_1, activation='relu')(x)
    x = Dropout(DROPOUT_1)(x)

    # Capa Densa 2
    x = Dense(UNITS_DENSE_2, activation='relu')(x)
    x = Dropout(DROPOUT_2)(x)

    # Capa de Salida Binaria
    predictions = Dense(NUM_CLASSES, activation='softmax')(x) # FIX: Changed activation to 'softmax' for 2 classes

    model = Model(inputs=base_model.input, outputs=predictions)
    return model

model = build_transfer_model()

# Compilación para la Fase 1
# AJUSTE 6: Se reincorpora F1Score de forma correcta
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy', # FIX: Changed loss to 'categorical_crossentropy'
    metrics=['accuracy', tf.keras.metrics.F1Score(average='weighted', name='f1_score')] # FIX: Changed 'binary' to 'weighted'
)

print("\n--- RESUMEN DE LA ARQUITECTURA DEL MODELO ---")
model.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5
[1m43941136/43941136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step

--- RESUMEN DE LA ARQUITECTURA DEL MODELO ---


In [None]:
# =======================================================================
# 4. ENTRENAMIENTO EN DOS FASES
# =======================================================================

# --- FASE 1: ENTRENAMIENTO DEL CLASIFICADOR (HEAD) ---

print(f"\n--- INICIANDO FASE 1: ENTRENAMIENTO DEL CLASIFICADOR (HEAD) --- (Épocas: {EPOCHS_PHASE_1})")

history_phase_1 = model.fit(
    train_generator,
    epochs=EPOCHS_PHASE_1,
    validation_data=validation_generator
)


# --- FASE 2: AJUSTE FINO (FINE-TUNING) ---

# 1. Descongelar las últimas capas del modelo base
model.trainable = True

for layer in model.layers[:FINE_TUNE_AT]:
    layer.trainable = False

# 2. Re-compilar el modelo con un Learning Rate bajo
model.compile(
    optimizer=Adam(learning_rate=1e-5), # Tasa de Aprendizaje muy pequeña
    loss='categorical_crossentropy', # FIX: Changed loss to 'categorical_crossentropy'
    metrics=['accuracy', tf.keras.metrics.F1Score(average='weighted', name='f1_score')] # AJUSTE 6: Reincorporación F1Score, FIX: Changed 'binary' to 'weighted'
)

print(f"\n--- INICIANDO FASE 2: AJUSTE FINO (FINE-TUNING) --- (Épocas adicionales: {EPOCHS_PHASE_2})")

history_phase_2 = model.fit(
    train_generator,
    epochs=EPOCHS_PHASE_1 + EPOCHS_PHASE_2,
    initial_epoch=history_phase_1.epoch[-1],
    validation_data=validation_generator
)



--- INICIANDO FASE 1: ENTRENAMIENTO DEL CLASIFICADOR (HEAD) --- (Épocas: 10)


  self._warn_if_super_not_called()


Epoch 1/10
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4321s[0m 26s/step - accuracy: 0.8221 - f1_score: 0.8220 - loss: 0.3816 - val_accuracy: 0.9069 - val_f1_score: 0.9067 - val_loss: 0.2092
Epoch 2/10
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 1s/step - accuracy: 0.8864 - f1_score: 0.8862 - loss: 0.2529 - val_accuracy: 0.9193 - val_f1_score: 0.9191 - val_loss: 0.1942
Epoch 3/10
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 1s/step - accuracy: 0.9014 - f1_score: 0.9015 - loss: 0.2231 - val_accuracy: 0.9243 - val_f1_score: 0.9243 - val_loss: 0.1759
Epoch 4/10
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 1s/step - accuracy: 0.9150 - f1_score: 0.9150 - loss: 0.2030 - val_accuracy: 0.9211 - val_f1_score: 0.9211 - val_loss: 0.1770
Epoch 5/10
[1m160/160[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 1s/step - accuracy: 0.9123 - f1_score: 0.9123 - loss: 0.1962 - val_accuracy: 0.9289 - val_f1_score:

In [None]:
# =======================================================================
# 5. PRUEBA Y MÉTRICAS FINALES
# =======================================================================

print("\n--- EVALUACIÓN FINAL EN EL CONJUNTO DE PRUEBA ---")

# La evaluación debe ajustarse para incluir las 3 métricas
loss, accuracy, f1_score_value = model.evaluate(test_generator, verbose=1)

print(f"\nResultados de la Métrica Estándar (Conjunto de Prueba):")
print(f"    Pérdida (Test): {loss:.4f}")
print(f"    Precisión (Accuracy): {accuracy:.4f}")
print(f"    F1-Score (Test): {f1_score_value:.4f}") # Imprime la métrica correctamente


# 2. Generar Predicciones y Reporte Detallado
test_generator.reset()
y_pred_prob = model.predict(test_generator, steps=test_generator.samples // test_generator.batch_size + 1)

y_pred = np.argmax(y_pred_prob, axis=1)

# Asegurar que el tamaño de las etiquetas verdaderas coincida con las predicciones
y_true = test_generator.classes

target_names = list(test_generator.class_indices.keys())

print("\nReporte de Clasificación Detallado:")
print(classification_report(y_true, y_pred, target_names=target_names))

print("\nMatriz de Confusión (True Negatives, False Positives, False Negatives, True Positives):")
cm = confusion_matrix(y_true, y_pred)
print(cm)

# Guardado del modelo final (opcional)
model.save('/content/drive/MyDrive/MATI/ADL_PRY/clasificador_aves_piciformes_efficientnetv2.keras')
model.save('/content/drive/MyDrive/MATI/ADL_PRY/clasificador_aves_piciformes_efficientnetv2.h5')


--- EVALUACIÓN FINAL EN EL CONJUNTO DE PRUEBA ---
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1042s[0m 31s/step - accuracy: 0.9437 - f1_score: 0.9559 - loss: 0.1382

Resultados de la Métrica Estándar (Conjunto de Prueba):
    Pérdida (Test): 0.1831
    Precisión (Accuracy): 0.9303
    F1-Score (Test): 0.9302
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 779ms/step

Reporte de Clasificación Detallado:
               precision    recall  f1-score   support

No_Piciformes       0.91      0.96      0.93      1113
   Piciformes       0.95      0.90      0.93      1068

     accuracy                           0.93      2181
    macro avg       0.93      0.93      0.93      2181
 weighted avg       0.93      0.93      0.93      2181


Matriz de Confusión (True Negatives, False Positives, False Negatives, True Positives):
[[1065   48]
 [ 104  964]]


