In [49]:
import numpy as np
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten,  Conv2D, MaxPooling2D, BatchNormalization, Input, Add, SeparableConv2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import precision_score, recall_score, f1_score
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Definir el número de clases para clasificación binaria
NUM_CLASES = 2

# Cargar CIFAR-10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Índice de la clase "barco" en CIFAR-10
indice_barco = 8  # "ship" es la clase con índice 8

# Etiquetas: 1 para "barco", 0 para "no barco"
y_train_bin = np.where(y_train.flatten() == indice_barco, 1, 0)
y_test_bin = np.where(y_test.flatten() == indice_barco, 1, 0)

# Normalizar los valores de los píxeles en el rango [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Convertir las etiquetas a one-hot encoding
y_train_cat = to_categorical(y_train_bin, NUM_CLASES)
y_test_cat = to_categorical(y_test_bin, NUM_CLASES)

# Función para ajustar una imagen a 32x32
def preprocess_frame(frame):
    resized_frame = cv2.resize(frame, (32, 32))
    normalized_frame = resized_frame.astype('float32') / 255.0
    return normalized_frame


# Función para construir CNN avanzada utilizando la API Funcional
def build_cnn(input_shape, num_classes):
    input_layer = Input(shape=input_shape)
    
    # Primera capa convolucional con Batch Normalization
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    
    # Segunda capa convolucional separable con Batch Normalization
    x = SeparableConv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    
    # Residual block
    residual = Conv2D(64, (1, 1), padding='same')(x)
    res = BatchNormalization()(residual)
    
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    
    x = Add()([x, res])  # Conexión residual
    x = BatchNormalization()(x)
    
    # Capa de Flatten y Densa
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    output_layer = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    return model

# Construir y compilar el modelo
model_barco = build_cnn((32, 32, 3), NUM_CLASES)
optimizer = Adam(learning_rate=0.001)
model_barco.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# (Opcional) Aumento de Datos para mejorar la generalización
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

datagen.fit(x_train)

# Entrenamiento del modelo con aumento de datos
model_barco.fit(
    datagen.flow(x_train, y_train_cat, batch_size=64),
    epochs=10,
    validation_data=(x_test, y_test_cat),
    verbose=2
)

# Evaluación del modelo
y_pred = model_barco.predict(x_test)
y_pred_classes = y_pred.argmax(axis=1)  # Para 'softmax'

precision = precision_score(y_test_bin, y_pred_classes)
recall = recall_score(y_test_bin, y_pred_classes)
f1 = f1_score(y_test_bin, y_pred_classes)

print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-Score: {f1}')

Epoch 1/10


  self._warn_if_super_not_called()


782/782 - 52s - 66ms/step - accuracy: 0.9006 - loss: 0.2529 - val_accuracy: 0.9337 - val_loss: 0.1820
Epoch 2/10
782/782 - 46s - 59ms/step - accuracy: 0.9357 - loss: 0.1698 - val_accuracy: 0.9457 - val_loss: 0.1459
Epoch 3/10
782/782 - 46s - 59ms/step - accuracy: 0.9429 - loss: 0.1490 - val_accuracy: 0.9001 - val_loss: 0.5708
Epoch 4/10
782/782 - 46s - 59ms/step - accuracy: 0.9484 - loss: 0.1363 - val_accuracy: 0.9594 - val_loss: 0.1133
Epoch 5/10
782/782 - 46s - 59ms/step - accuracy: 0.9530 - loss: 0.1254 - val_accuracy: 0.9584 - val_loss: 0.1119
Epoch 6/10
782/782 - 48s - 61ms/step - accuracy: 0.9565 - loss: 0.1157 - val_accuracy: 0.9587 - val_loss: 0.1159
Epoch 7/10
782/782 - 47s - 60ms/step - accuracy: 0.9595 - loss: 0.1076 - val_accuracy: 0.9663 - val_loss: 0.0928
Epoch 8/10
782/782 - 47s - 60ms/step - accuracy: 0.9622 - loss: 0.1021 - val_accuracy: 0.9685 - val_loss: 0.0906
Epoch 9/10
782/782 - 46s - 59ms/step - accuracy: 0.9639 - loss: 0.1006 - val_accuracy: 0.9676 - val_loss: 0

In [52]:
import cv2
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import os

def sliding_window_detector_notebook(video_path, model, 
                                     window_size=(25, 25), step_size=2,
                                     save_dir='detected_frames'):
    # Crear directorio para guardar frames detectados si no existe
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        print(f"No se pudo abrir el video: {video_path}")
        return

    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    
    frame_count = 0
    detected_frame_count = 0

    while video.isOpened():
        ret, frame = video.read()
        if not ret:
            break
        
        frame_count += 1
        detections = []  # Guardar las coordenadas de las detecciones
        
        # Recorrer la imagen con ventana deslizante
        for y in range(0, frame_height - window_size[1] + 1, step_size):
            for x in range(0, frame_width - window_size[0] + 1, step_size):
                window = frame[y:y + window_size[1], x:x + window_size[0]]
                if window.shape[:2] != window_size:
                    continue
                
                # Preprocesar la ventana
                window2 = cv2.resize(window, (32, 32))
                processed_window = window2.astype('float32') / 255.0
                processed_window = np.expand_dims(processed_window, axis=0)  # Expandir las dimensiones
                
                # Predicción
                prediction = model.predict(processed_window)
                
                # Asumiendo que el modelo devuelve probabilidades con softmax
                predicted_class = prediction.argmax(axis=1)[0]
                
                if predicted_class == 1:
                    # Agregar coordenadas de la detección a la lista
                    detections.append((x, y, x + window_size[0], y + window_size[1]))
        
        # Dibujar las detecciones en el frame
        for (x1, y1, x2, y2) in detections:
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        # Si hay al menos una detección, guarda el frame
        if detections:
            detected_frame_count += 1
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            save_path = os.path.join(save_dir, f"frame_{frame_count}.jpg")
            cv2.imwrite(save_path, cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        
        # Convertir el frame a RGB para matplotlib
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Mostrar el frame con detecciones
        plt.figure(figsize=(10, 6))
        plt.imshow(frame_rgb)
        plt.title(f"Frame {frame_count} / {total_frames} - Detecciones: {len(detections)}")
        plt.axis('off')
        display(plt.gcf())
        plt.close()
        
        # Limpiar la salida para mostrar un frame a la vez en el notebook
        clear_output(wait=True)
    
    video.release()
    clear_output()
    print(f"Proceso completado. Total de frames procesados: {frame_count}")
    print(f"Total de frames con detecciones: {detected_frame_count}")
    print(f"Frames detectados guardados en la carpeta: '{save_dir}'")


In [51]:
# Ejecutar el detector en el video
sliding_window_detector_notebook('barco3.mp4', model_barco)

Proceso completado. Total de frames procesados: 24
Total de frames con detecciones: 3
Frames detectados guardados en la carpeta: 'detected_frames'
