In [1]:
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import train_test_split

import logging

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, 
                                     BatchNormalization, GlobalAveragePooling2D, LeakyReLU)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
import datetime

from keras.applications.vgg16 import preprocess_input as vgg16_preprocess
from keras.applications.resnet50 import preprocess_input as resnet_preprocess
from keras.applications.xception import preprocess_input as xception_preprocess

2025-11-28 13:09:30.552596: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Ensemble y Distilación
Se crea el ensemble con Voting classifier de los 4 modelos e intentar destilarlo por un único modelo

In [2]:
# Desactiva todas las GPUs
tf.config.set_visible_devices([], 'GPU')

W0000 00:00:1764331772.718382    7793 gpu_device.cc:2431] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 5.2. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.


In [3]:
# Carpeta donde tienes los modelos .keras
folder = "../models/classifier/"

# Cargar los modelos
models_paths = ["new_model_11_20_h12_29.keras", "vgg16_finetuned_11_25_11_17.keras", "resnet50_finetuned_11_26_12_13.keras", "inceptionv3_finetuned_11_26_13_17.keras"]
models_names = ["scratch", "VGG19", "ResNet", "Xception"]
models = [tf.keras.models.load_model(os.path.join(folder, f)) for f in models_paths]
preprocesses = [lambda x: x/255., vgg16_preprocess, resnet_preprocess, xception_preprocess]
models

[<Functional name=cnn_mejorada, built=True>,
 <Functional name=functional_1, built=True>,
 <Functional name=functional_1, built=True>,
 <Functional name=functional_3, built=True>]

### Ensemble

In [4]:
# class VotingTeacher(tf.keras.Model):
#     def __init__(self, models, preprocess_fns=None):
#         """
#         Args:
#             models: lista de modelos Keras ya cargados
#             preprocess_fns: lista de funciones de preprocesado, una por modelo.
#                             Cada función recibe X y devuelve X_preprocesado.
#                             Si None, se usa identidad.
#         """
#         super().__init__()
#         self.models = models
#         if preprocess_fns is None:
#             preprocess_fns = [lambda x: x for _ in models]
#         self.preprocess_fns = preprocess_fns

#     def call(self, X, training=False):
#         hard_votes = []
#         soft_probs = []

#         # Obtener predicciones de cada modelo
#         for m, fn in zip(self.models, self.preprocess_fns):
#             X_prep = fn(X)
#             p = m(X_prep, training=training)  # salida (N,1)
#             p = tf.cast(tf.reshape(p, (-1, 1)), tf.float32)
#             hard_votes.append(tf.cast(p > 0.5, tf.int32))
#             soft_probs.append(p)

#         # Apilar resultados
#         hard_votes = tf.concat(hard_votes, axis=1)   # (N, num_models)
#         soft_probs = tf.concat(soft_probs, axis=1)   # (N, num_models)

#         # Hard voting con desempate vía soft voting
#         num_models = len(self.models)
#         sum_votes = tf.reduce_sum(hard_votes, axis=1)  # (N,)

#         def resolve_vote(i):
#             if sum_votes[i] > num_models / 2:
#                 return 1
#             elif sum_votes[i] < num_models / 2:
#                 return 0
#             else:
#                 # Empate → usar soft voting
#                 return 1 if tf.reduce_mean(soft_probs[i]) > 0.5 else 0

#         # Aplicar la regla a cada muestra
#         majority_vote = tf.map_fn(lambda i: resolve_vote(i),
#                                   tf.range(tf.shape(X)[0]),
#                                   dtype=tf.int32)

#         # Devolver como tensor (N,1)
#         return tf.reshape(majority_vote, (-1, 1))


In [5]:
class VotingTeacher(tf.keras.Model):
    def __init__(self, models, preprocess_fns=None):
        """
        Args:
            models: lista de modelos Keras ya cargados
            preprocess_fns: lista de funciones de preprocesado, una por modelo.
                            Cada función recibe X y devuelve X_preprocesado.
                            Si None, se usa identidad.
        """
        super().__init__()
        self.models = models
        if preprocess_fns is None:
            print("Preprocess functions missing")
            preprocess_fns = [lambda x: x for _ in models]
        self.preprocess_fns = preprocess_fns

    def call(self, X, training=False):
        preds = []
        for m, fn in zip(self.models, self.preprocess_fns):
            # aplicar preprocesado compatible con tf.Tensor
            X_prep = fn(X)
            p = m(X_prep, training=training)  # salida (N,1)
            p = tf.cast(tf.reshape(p, (-1, 1)), tf.float32)
            preds.append(p)
        stacked = tf.stack(preds, axis=0)      # (num_models, N, 1)
        avg_preds = tf.reduce_mean(stacked, axis=0)  # (N,1)
        return avg_preds

In [7]:
datagen = ImageDataGenerator(
        rotation_range=60,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.12,
        brightness_range=[0.8, 1.2],
        shear_range=0.2,
        vertical_flip=True,
        horizontal_flip=True
    )

train_generator = datagen.flow_from_directory(
    "../data/processed/train",
    target_size=(224, 224),
    batch_size=20,
    class_mode='binary',
    shuffle=True
)

# Extraer 10 batches del generador
X_all, y_all = [], []
for _ in range(50):
    X_batch, y_batch = next(train_generator)
    X_all.append(X_batch)
    y_all.append(y_batch)

X_all = np.vstack(X_all)
y_all = np.concatenate(y_all)

print(f"Total imágenes usadas: {X_all.shape[0]}")

# Evaluar cada modelo con su preprocesamiento y sacar classification_report
for name, model, preprocess in zip(models_names, models, preprocesses):
    X_prep = preprocess(X_all)
    y_pred = model.predict(X_prep, verbose=0)
    y_pred_classes = (y_pred > 0.5).astype("int32")
    
    print(f"\nModelo {name} - Classification Report (10 batches):")
    print(classification_report(y_all, y_pred_classes, digits=4))

#Usar el VotingTeacher para predecir sobre tus datos
y_pred = VotingTeacher(models, preprocesses)(X_all, training=False).numpy()
y_pred_classes = (y_pred > 0.5).astype("int32")

print("\nVotingTeacher - Classification Report (10 batches):")
print(classification_report(y_all, y_pred_classes, digits=4))

Found 8517 images belonging to 2 classes.
Total imágenes usadas: 1000


2025-11-28 13:10:27.718170: I external/local_xla/xla/service/service.cc:163] XLA service 0x73a60800b3b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2025-11-28 13:10:27.718196: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): Host, Default Version
2025-11-28 13:10:27.736555: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1764331827.942284    7906 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.



Modelo scratch - Classification Report (10 batches):
              precision    recall  f1-score   support

         0.0     0.8471    0.9167    0.8805       804
         1.0     0.4846    0.3214    0.3865       196

    accuracy                         0.8000      1000
   macro avg     0.6659    0.6190    0.6335      1000
weighted avg     0.7761    0.8000    0.7837      1000


Modelo VGG19 - Classification Report (10 batches):
              precision    recall  f1-score   support

         0.0     0.9734    0.9092    0.9402       804
         1.0     0.7068    0.8980    0.7910       196

    accuracy                         0.9070      1000
   macro avg     0.8401    0.9036    0.8656      1000
weighted avg     0.9211    0.9070    0.9110      1000


Modelo ResNet - Classification Report (10 batches):
              precision    recall  f1-score   support

         0.0     0.8762    0.8893    0.8827       804
         1.0     0.5163    0.4847    0.5000       196

    accuracy           

2025-11-28 13:12:17.871746: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 6422528000 exceeds 10% of free system memory.
2025-11-28 13:12:18.601350: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 6422528000 exceeds 10% of free system memory.
2025-11-28 13:12:19.377014: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 6422528000 exceeds 10% of free system memory.
2025-11-28 13:12:20.117448: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 6422528000 exceeds 10% of free system memory.
2025-11-28 13:12:20.528301: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 6422528000 exceeds 10% of free system memory.


: 

### Knowledge Distillation

In [None]:
class Distiller(Model):
    def __init__(self, student, teacher):
        super().__init__()
        self.teacher = teacher
        self.student = student

    def compile(
        self,
        optimizer,
        metrics,
        student_loss_fn,
        distillation_loss_fn,
        alpha=0.1,
        temperature=3,
    ):
        """
        Configure the distiller for binary classification.

        Args:
            optimizer: Keras optimizer for the student weights
            metrics: Keras metrics for evaluation (e.g. BinaryAccuracy)
            student_loss_fn: Loss between student predictions and ground-truth
            distillation_loss_fn: Loss between soft student predictions and soft teacher predictions
            alpha: weight to student_loss_fn and 1-alpha to distillation_loss_fn
            temperature: Temperature for softening probability distributions
        """
        super().compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature

    def train_step(self, data):
        # Unpack data (puede traer sample_weight)
        if isinstance(data, tuple):
            if len(data) == 2:
                x, y = data
            elif len(data) == 3:
                x, y, _ = data
            else:
                raise ValueError(f"Formato inesperado en data: {len(data)} elementos")
        else:
            # Si data es dict, usa las claves
            x, y = data["x"], data["y"]

        # Forward pass del teacher
        teacher_predictions = self.teacher(x, training=False)

        with tf.GradientTape() as tape:
            student_predictions = self.student(x, training=True)

            student_loss = self.student_loss_fn(y, student_predictions)

            teacher_soft = tf.nn.sigmoid(teacher_predictions / self.temperature)
            student_soft = tf.nn.sigmoid(student_predictions / self.temperature)

            distillation_loss = (
                self.distillation_loss_fn(teacher_soft, student_soft)
                * (self.temperature ** 2)
            )

            loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss

        gradients = tape.gradient(loss, self.student.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.student.trainable_variables))

        for metric in self.metrics:
            metric.update_state(y, student_predictions)


        results = {m.name: m.result() for m in self.metrics}
        results.update({"student_loss": student_loss, "distillation_loss": distillation_loss})
        return results


    def test_step(self, data):
        # Unpack data
        x, y = data

        # Compute predictions
        y_prediction = self.student(x, training=False)

        # Calculate the loss
        student_loss = self.student_loss_fn(y, y_prediction)

        # Update metrics
        for metric in self.metrics:
            metric.update_state(y, y_prediction)

        # Return dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update({"student_loss": student_loss})
        return results


#### Creamos el modelo Student

In [None]:
def clasificador_binario(input_shape=(224,224,3), lr=1e-3):
    entrada = Input(shape=input_shape, name='entrada_imagen')

    # Bloque 1
    x = Conv2D(32, (3,3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(entrada)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2,2))(x)

    # Bloque 2
    x = Conv2D(64, (3,3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2,2))(x)

    # Bloque 3
    x = Conv2D(128, (3,3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2,2))(x)

    # Bloque 4 (extra para más capacidad)
    x = Conv2D(256, (3,3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2,2))(x)

    # Global pooling en lugar de Flatten (reduce parámetros)
    x = GlobalAveragePooling2D()(x)

    # Capa densa
    x = Dense(128, activation='relu', kernel_regularizer=l2(1e-4))(x)
    x = Dropout(0.5)(x)

    salida = Dense(1, activation='sigmoid', name='salida_binaria')(x)

    modelo = Model(inputs=entrada, outputs=salida, name='student')
    modelo.compile(optimizer=Adam(learning_rate=lr),
                   loss='binary_crossentropy',
                   metrics=['accuracy'])
    return modelo
# Igual que el clasificador desde cero
student = clasificador_binario()

#### Cargamos datos

In [None]:
data_dir = "../data/processed/train"

def get_generators(data_dir, preprocess_fn, target_size=(224, 224), batch_size=128, validation_split=0.15):
    datagen = ImageDataGenerator(
        preprocessing_function=preprocess_fn,
        rotation_range=60,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.12,
        brightness_range=[0.8, 1.2],
        shear_range=0.2,
        vertical_flip=True,
        horizontal_flip=True,
        validation_split=validation_split
    )

    train_generator = datagen.flow_from_directory(
        data_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='binary',
        subset='training',
        shuffle=True
    )

    val_generator = datagen.flow_from_directory(
        data_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='binary',
        subset='validation',
        shuffle=False
    )

    return train_generator, val_generator

#### Instanciamos código

In [None]:
train_generator, val_generator = get_generators(data_dir, lambda x: x)

labels = train_generator.classes

# Calculamos los pesos
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(labels),
    y=labels
)

# Lo convertimos en diccionario para Keras
class_weights = dict(enumerate(class_weights))
print(class_weights)

teacher = VotingTeacher(models, preprocesses)

Found 7662 images belonging to 2 classes.
Found 1351 images belonging to 2 classes.
{0: 0.6213104119364256, 1: 2.560828877005348}


In [None]:
distiller = Distiller(student=student, teacher=teacher)
distiller.compile(
    optimizer=keras.optimizers.Adam(),
    student_loss_fn=keras.losses.BinaryCrossentropy(from_logits=False),
    metrics=[keras.metrics.BinaryAccuracy(name="accuracy")],
    distillation_loss_fn=keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=10,
)

# Ajustar nivel de logging de TensorFlow
logging.getLogger("tensorflow").setLevel(logging.ERROR)
# Distill teacher to student
distiller.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    class_weight=class_weights
)

# Evaluate student on test dataset
distiller.evaluate(val_generator)

Epoch 1/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15s/step - accuracy: 0.2201 - distillation_loss: -1.5436 - loss: 0.1962 - student_loss: 2.7795 

ValueError: not enough values to unpack (expected 3, got 2)