In [1]:
# Preprocesamiento de Datos
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Procesamiento de Imágenes
import cv2 as cv
import os
from tqdm import tqdm  # Barra de progreso para visualizar el proceso

# TensorFlow y Keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Visualización
import matplotlib.pyplot as plt

In [2]:
# Rutas de Datos
ruta_train_csv = r'D:\Archivos de usuarios\Mikel Telo\OneDrive\Documentos\TB-DS-BIO-23.09.24\REPOSITORIOS\Mikel\clip_count\train.csv'
ruta_test_csv = r'D:\Archivos de usuarios\Mikel Telo\OneDrive\Documentos\TB-DS-BIO-23.09.24\REPOSITORIOS\Mikel\clip_count\test.csv'
ruta_train_img = r'D:\Archivos de usuarios\Mikel Telo\OneDrive\Documentos\TB-DS-BIO-23.09.24\REPOSITORIOS\Mikel\clip_count\train'
ruta_test_img = r'D:\Archivos de usuarios\Mikel Telo\OneDrive\Documentos\TB-DS-BIO-23.09.24\REPOSITORIOS\Mikel\clip_count\test'

# Cargar Datos
df_train = pd.read_csv(ruta_train_csv)
df_test = pd.read_csv(ruta_test_csv)

print(df_train.head())
print(df_test.head())


      id  clip_count
0  30001          11
1  30002           2
2  30003          26
3  30004          41
4  30005          49
      id
0  45001
1  45002
2  45003
3  45004
4  45005


In [3]:
# ✅ Función para limpiar y preprocesar una imagen
def cleaning_img(img): 
    b, _, _ = cv.split(img)
    blur = cv.GaussianBlur(b, (3, 3), cv.BORDER_DEFAULT)
    _, thresh = cv.threshold(blur, 225, 255, 1, cv.THRESH_BINARY)
    thresh = 255 - thresh
    canny = cv.Canny(thresh, 0, 25)
    dilated = cv.dilate(canny, (15, 15), iterations=2)
    eroded = cv.erode(dilated, (7, 7), iterations=1)

    # Eliminar líneas verticales
    vertical_kernel = cv.getStructuringElement(cv.MORPH_RECT, (1, 100))
    detected_lines_vertical = cv.morphologyEx(eroded, cv.MORPH_OPEN, vertical_kernel, iterations=1)
    cnts_vertical, _ = cv.findContours(detected_lines_vertical, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)

    mask_vertical = np.zeros_like(eroded)
    for cnt_vertical in cnts_vertical:
        x, y, w, h = cv.boundingRect(cnt_vertical)
        cv.rectangle(mask_vertical, (x, y), (x + w, y + h), 255, -1)

    mask_inverted_vertical = cv.bitwise_not(mask_vertical)
    clean_img = cv.bitwise_and(eroded, eroded, mask=mask_inverted_vertical)
    
    return clean_img


In [4]:
# ✅ Función para leer y preprocesar imágenes en una carpeta
def read_data(df, ruta_img):
    X = []
    y = [] if 'clip_count' in df.columns else None
    
    for index, row in tqdm(df.iterrows(), total=len(df)):
        img_path = os.path.join(ruta_img, f"clips-{row['id']}.png")
        img = cv.imread(img_path)
        
        if img is not None:
            img_clean = cleaning_img(img)
            img_resized = cv.resize(img_clean, (96, 96))
            img_normalized = img_resized / 255.0  # Normalizar
            
            X.append(img_normalized)
            if y is not None:
                y.append(row['clip_count'])
    
    if y is not None:
        return np.array(X), np.array(y)
    return np.array(X)


In [5]:
# ✅ Cargar Datos de Entrenamiento y Prueba
X_train, y_train = read_data(df_train, ruta_train_img)
X_test = read_data(df_test, ruta_test_img)

# ✅ Ajustar Dimensiones
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

# ✅ Dividir en Conjunto de Validación
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

print(f"✅ X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"✅ X_val: {X_val.shape}, y_val: {y_val.shape}")
print(f"✅ X_test: {X_test.shape}")


100%|██████████| 15000/15000 [04:10<00:00, 59.98it/s]
100%|██████████| 5000/5000 [01:14<00:00, 66.90it/s]


✅ X_train: (12000, 96, 96, 1), y_train: (12000,)
✅ X_val: (3000, 96, 96, 1), y_val: (3000,)
✅ X_test: (5000, 96, 96, 1)


In [6]:
# ✅ Modelo CNN
modelo = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(96, 96, 1)),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1)  # Salida numérica para regresión
])

modelo.compile(
    optimizer='adam',
    loss='mean_squared_error',
    metrics=['mae']
)

modelo.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
# ✅ Callbacks
callback_early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)
callback_checkpoint = ModelCheckpoint('modelo_mejor.keras', monitor='val_loss', save_best_only=True, mode='min')

# ✅ Entrenamiento
historial = modelo.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=30,
    batch_size=32,
    callbacks=[callback_early_stopping, callback_reduce_lr, callback_checkpoint]
)


Epoch 1/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 131ms/step - loss: 295.4103 - mae: 12.8267 - val_loss: 610.1249 - val_mae: 19.9682 - learning_rate: 0.0010
Epoch 2/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 133ms/step - loss: 104.0283 - mae: 7.6474 - val_loss: 122.9393 - val_mae: 8.4182 - learning_rate: 0.0010
Epoch 3/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 133ms/step - loss: 75.2340 - mae: 6.3945 - val_loss: 23.2677 - val_mae: 3.6385 - learning_rate: 0.0010
Epoch 4/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 135ms/step - loss: 60.5888 - mae: 5.6715 - val_loss: 83.7373 - val_mae: 7.5614 - learning_rate: 0.0010
Epoch 5/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 135ms/step - loss: 57.8381 - mae: 5.5656 - val_loss: 23.5520 - val_mae: 3.6047 - learning_rate: 0.0010
Epoch 6/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 115ms/step 

KeyboardInterrupt: 

In [None]:
# ✅ Evaluar en Conjunto de Validación
loss, mae = modelo.evaluate(X_val, y_val)
print(f"✅ Pérdida (MSE): {loss}, Error Absoluto Medio (MAE): {mae}")


In [None]:
# ✅ Predicciones en el Conjunto de Prueba
predicciones = modelo.predict(X_test)

# ✅ Crear Archivo de Salida
df_test['clip_count'] = predicciones.flatten().astype(int)
df_test.to_csv('predicciones_test.csv', index=False)
print("✅ Predicciones guardadas en 'predicciones_test.csv'")
