In [2]:
# ✅ 1. Librerías Necesarias
import os
import cv2 as cv
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import tensorflow as tf  # ✅ Importar TensorFlow correctamente

# ✅ 2. Configuración Global
IMG_SIZE = (128, 128)  # Nuevo tamaño para las imágenes
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

# ✅ 3. Rutas de Datos
ruta_train_csv = r'D:\Archivos de usuarios\Mikel Telo\OneDrive\Documentos\TB-DS-BIO-23.09.24\REPOSITORIOS\Mikel\clip_count\train.csv'
ruta_test_csv = r'D:\Archivos de usuarios\Mikel Telo\OneDrive\Documentos\TB-DS-BIO-23.09.24\REPOSITORIOS\Mikel\clip_count\test.csv'
ruta_train_img = r'D:\Archivos de usuarios\Mikel Telo\OneDrive\Documentos\TB-DS-BIO-23.09.24\REPOSITORIOS\Mikel\clip_count\train'
ruta_test_img = r'D:\Archivos de usuarios\Mikel Telo\OneDrive\Documentos\TB-DS-BIO-23.09.24\REPOSITORIOS\Mikel\clip_count\test'

# ✅ 4. Cargar Datos
df_train = pd.read_csv(ruta_train_csv)
df_test = pd.read_csv(ruta_test_csv)

# ✅ 5. Preprocesamiento de Imágenes
def cleaning_img(img):
    rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    b, _, _ = cv.split(img)
    blur = cv.GaussianBlur(b, (3,3), cv.BORDER_DEFAULT)
    _, thresh = cv.threshold(blur, 225, 255, 1, cv.THRESH_BINARY)
    thresh = 255 - thresh
    canny = cv.Canny(thresh, 0, 25)
    dilated = cv.dilate(canny, (15,15), iterations=2)
    eroded = cv.erode(dilated, (7,7), iterations=1)
    vertical_kernel = cv.getStructuringElement(cv.MORPH_RECT, (1, 100))
    detected_lines_vertical = cv.morphologyEx(eroded, cv.MORPH_OPEN, vertical_kernel, iterations=1)
    cnts_vertical, _ = cv.findContours(detected_lines_vertical, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
    mask_vertical = np.zeros_like(eroded)
    for cnt_vertical in cnts_vertical:
        x, y, w, h = cv.boundingRect(cnt_vertical)
        cv.rectangle(mask_vertical, (x, y), (x + w, y + h), 255, -1)
    mask_inverted_vertical = cv.bitwise_not(mask_vertical)
    clean_img = cv.bitwise_and(eroded, eroded, mask=mask_inverted_vertical)
    return clean_img, len(cnts_vertical)

# ✅ 6. Procesar Carpeta de Imágenes
def procesar_imagenes_en_carpeta(ruta_carpeta):
    imagenes_procesadas = []
    etiquetas = []
    for _, row in tqdm(df_train.iterrows(), total=len(df_train)):
        img_path = os.path.join(ruta_carpeta, f'clips-{row["id"]}.png')
        img = cv.imread(img_path)
        if img is not None:
            img_masked, num_clips = cleaning_img(img)
            smallimage = cv.resize(img_masked, IMG_SIZE)
            smallimage = smallimage / 255.0  # Normalizar
            imagenes_procesadas.append(smallimage)
            etiquetas.append(row['clip_count'])
    return np.array(imagenes_procesadas)[..., np.newaxis], np.array(etiquetas, dtype=np.float32)

# ✅ 7. Crear Datos de Entrenamiento y Validación
X_train, y_train = procesar_imagenes_en_carpeta(ruta_train_img)
X_test, _ = procesar_imagenes_en_carpeta(ruta_test_img)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# ✅ 8. Definir el Modelo CNN
modelo = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    
    Dense(1)  # Salida para regresión
])

# ✅ 9. Compilar el Modelo
modelo.compile(
    optimizer=Adam(learning_rate=0.0005),
    loss='mean_squared_error',
    metrics=['mae']
)

# ✅ 10. Callbacks
callback_early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)
callback_checkpoint = ModelCheckpoint('modelo_mejor.keras', monitor='val_loss', save_best_only=True, mode='min')

# ✅ 11. Entrenar el Modelo
historial = modelo.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=30,
    batch_size=32,
    callbacks=[callback_early_stopping, callback_reduce_lr, callback_checkpoint]
)

# ✅ 12. Evaluar el Modelo
loss, mae = modelo.evaluate(X_val, y_val)
print(f'✅ Pérdida en Validación: {loss:.4f}, MAE: {mae:.4f}')

# ✅ 13. Generar Predicciones para el Test
y_pred = modelo.predict(X_test)

# ✅ 14. Guardar Resultados en CSV
df_test['clip_count'] = y_pred.flatten().astype(int)
df_test.to_csv('resultados_test.csv', index=False)
print("✅ Resultados guardados en 'resultados_test.csv'")

# ✅ 15. Visualizar el Historial de Entrenamiento
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(historial.history['loss'], label='Pérdida Entrenamiento')
plt.plot(historial.history['val_loss'], label='Pérdida Validación')
plt.legend()
plt.title('Evolución de la Pérdida')

plt.subplot(1, 2, 2)
plt.plot(historial.history['mae'], label='MAE Entrenamiento')
plt.plot(historial.history['val_mae'], label='MAE Validación')
plt.legend()
plt.title('Evolución del MAE')
plt.show()


100%|██████████| 15000/15000 [00:57<00:00, 260.03it/s]
100%|██████████| 15000/15000 [00:03<00:00, 4908.10it/s]
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 224ms/step - loss: 283.8856 - mae: 12.4918 - val_loss: 515.6740 - val_mae: 18.1189 - learning_rate: 5.0000e-04
Epoch 2/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 269ms/step - loss: 140.1312 - mae: 8.7825 - val_loss: 37.8073 - val_mae: 4.6098 - learning_rate: 5.0000e-04
Epoch 3/30
[1m163/375[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m52s[0m 247ms/step - loss: 106.6596 - mae: 7.5801

KeyboardInterrupt: 