In [1]:
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import logging

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, 
                                     BatchNormalization, GlobalAveragePooling2D)
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
import datetime


In [None]:
data_dir = "../data/full/train"
val_dir = "../data/full/val"

# Augmentación para entrenamiento
datagen = ImageDataGenerator(
    rescale=1./255, 
    rotation_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    brightness_range=[0.8,1.2],
    shear_range=0.2,
    horizontal_flip=True, # Contempla manchas simétricas
)

# Generador de entrenamiento
train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=True
)

datagen_val = ImageDataGenerator(
    rescale=1./255
)

val_generator = datagen_val.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

print(pd.Series(val_generator.classes).value_counts())
print(pd.Series(train_generator.classes).value_counts())

Found 8517 images belonging to 7 classes.
Found 496 images belonging to 7 classes.
5    332
4     55
2     54
1     26
0     16
6      7
3      6
Name: count, dtype: int64
5    5702
4     947
2     935
1     437
0     278
6     121
3      97
Name: count, dtype: int64


In [3]:
labels = train_generator.classes

# Calculamos los pesos
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(labels),
    y=labels
)

# Lo convertimos en diccionario para Keras
class_weights = dict(enumerate(class_weights))
print(class_weights)


{0: np.float64(4.376670092497431), 1: np.float64(2.784243216737496), 2: np.float64(1.3012987012987014), 3: np.float64(12.543446244477172), 4: np.float64(1.2848091718207875), 5: np.float64(0.21338377511650047), 6: np.float64(10.055489964580874)}


In [8]:
next(val_generator)

(array([], shape=(0, 224, 224, 3), dtype=float32), array([], dtype=float32))

## Modelo

In [None]:
def clasificador_binario(input_shape=(224,224,3), lr=1e-3):
    entrada = Input(shape=input_shape, name='entrada_imagen')

    # Bloque 1
    x = Conv2D(32, (3,3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(entrada)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2,2))(x)

    # Bloque 2
    x = Conv2D(64, (3,3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2,2))(x)

    # Bloque 3
    x = Conv2D(128, (3,3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2,2))(x)

    # Bloque 4 (extra para más capacidad)
    x = Conv2D(256, (3,3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2,2))(x)

    # Global pooling en lugar de Flatten (reduce parámetros)
    x = GlobalAveragePooling2D()(x)

    # Capa densa
    x = Dense(128, activation='relu', kernel_regularizer=l2(1e-4))(x)
    x = Dropout(0.2)(x)

    salida = Dense(7, activation='softmax', name='salida_clases')(x)

    modelo = Model(inputs=entrada, outputs=salida, name='cnn_mejorada')
    modelo.compile(optimizer=Adam(learning_rate=lr),
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])
    return modelo



In [5]:
import tensorflow as tf

# Desactiva todas las GPUs
tf.config.set_visible_devices([], 'GPU')


In [None]:
# Ajustar nivel de logging de TensorFlow
logging.getLogger("tensorflow").setLevel(logging.ERROR)

modelo = clasificador_binario()  

history = modelo.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    class_weight=class_weights
)


  self._warn_if_super_not_called()


Epoch 1/20
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m409s[0m 2s/step - accuracy: 0.0510 - loss: -976.7233 - val_accuracy: 0.0524 - val_loss: -3173.8054
Epoch 2/20
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m368s[0m 1s/step - accuracy: 0.0513 - loss: -13526.7090 - val_accuracy: 0.0524 - val_loss: -23161.7871
Epoch 3/20
[1m223/267[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m1:00[0m 1s/step - accuracy: 0.0491 - loss: -45791.3672

In [None]:
# Current timestamp
timestamp = datetime.datetime.now().strftime("%m_%d_h%H_%M")

# Carpeta donde guardar
save_dir = "../models/classifier"
os.makedirs(save_dir, exist_ok=True)

modelo.save(f"../models/classifier/new_model_{timestamp}.keras")

In [None]:
modelo.summary()