Trabajo final 
Redees Neuronales
Flores Lara Alberto 6BV1

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D, BatchNormalization, Input, Add, SeparableConv2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import precision_score, recall_score, f1_score
import time
from tensorflow.keras.regularizers import l2
from tensorflow.keras.applications import ResNet50
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import os

In [None]:
# Configuración de parámetros
NUM_CLASES = 10
EPOCHS = 30
BATCH_SIZE = 64

# 1. Cargar y Preprocesar CIFAR-10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalizar los valores de los píxeles en el rango [0, 1]
x_train_resized = x_train.astype('float32') / 255.0
x_test_resized = x_test.astype('float32') / 255.0

# Convertir las etiquetas a one-hot encoding
y_train_cat = to_categorical(y_train, NUM_CLASES)
y_test_cat = to_categorical(y_test, NUM_CLASES)

In [2]:
# 2. Definir Funciones para Construir Modelos

# 2.1. Función para construir MLP
def build_mlp(input_shape):
    model = Sequential()
    model.add(Flatten(input_shape=input_shape))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(NUM_CLASES, activation='softmax'))
    return model

# 2.2. Función para construir CNN básica
def build_cnn(input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(NUM_CLASES, activation='softmax'))
    return model

# 2.3. Función para construir CNN con regularización
def build_cnn_regularized(input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.001), input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.3))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.3))
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(NUM_CLASES, activation='softmax'))
    return model

# 2.4. Función para construir CNN avanzada
def build_cnn_advanced(input_shape):
    input_layer = Input(shape=input_shape)

    # Primera capa convolucional con Batch Normalization
    x = Conv2D(32, (3, 3), activation='relu')(input_layer)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    # Segunda capa convolucional separable
    x = SeparableConv2D(64, (3, 3), activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    # Residual block
    residual = Conv2D(64, (1, 1), padding='same')(x)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = Add()([x, residual])  # Conexión residual

    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    output_layer = Dense(NUM_CLASES, activation='softmax')(x)

    model = Model(inputs=input_layer, outputs=output_layer)
    return model

# 2.5. Función para construir Transfer Learning (ResNet50))
def build_cnn_transfer_learning(input_shape):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in base_model.layers:
        layer.trainable = False

    x = base_model.output
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    output_layer = Dense(10, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=output_layer)
    return model

# 2.6. Función para construir Fine Tuning (ResNet50))
def build_cnn_fine_tuning(input_shape):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in base_model.layers[:-10]:  # Desbloqueamos las últimas 10 capas
        layer.trainable = True

    x = base_model.output
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    output_layer = Dense(10, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=output_layer)
    return model




In [None]:
# Definir la funcion de entrenamiento y evaluacion de los modelos

# Lista para almacenar los resultados de cada modelo
results = []

def train_and_evaluate(model, model_name, x_train, y_train, x_test, y_test, epochs=EPOCHS, batch_size=BATCH_SIZE, fine_tune=False):
    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    start_time = time.time()
    history = model.fit(
        x_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(x_test, y_test),
        verbose=2
    )
    end_time = time.time()

    # Predicciones
    y_pred = np.argmax(model.predict(x_test), axis=1)
    y_true = np.argmax(y_test, axis=1)

    # Cálculo de métricas
    accuracy = np.mean(y_pred == y_true)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')

    # Almacenar resultados
    results.append({
        'Modelo': model_name,
        'Exactitud': accuracy,
        'Precisión': precision,
        'Recall': recall,
        'F1-score': f1,
        'Tiempo de Entrenamiento (s)': end_time - start_time
    })

    print(f"Modelo: {model_name}")
    print(f"Exactitud: {accuracy:.4f}, Precisión: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}")
    print(f"Tiempo de entrenamiento: {end_time - start_time:.2f} segundos\n")

    return history, end_time - start_time


In [None]:
# 4.1. Entrenar y evaluar MLP
print("Entrenando MLP...")
model_mlp = build_mlp((32, 32, 3))
history_mlp, time_mlp = train_and_evaluate(model_mlp, 'MLP', x_train_resized, y_train_cat, x_test_resized, y_test_cat)

In [None]:
# 4.2. Entrenar y evaluar CNN básica
print("Entrenando CNN Básica...")
model_cnn = build_cnn((32, 32, 3))
history_cnn, time_cnn = train_and_evaluate(model_cnn, 'CNN Básica', x_train_resized, y_train_cat, x_test_resized, y_test_cat)


In [None]:
# 4.3. Entrenar y evaluar CNN regularizada
print("Entrenando CNN Regularizada...")
model_cnn_reg = build_cnn_regularized((32, 32, 3))
history_cnn_reg, time_cnn_reg = train_and_evaluate(model_cnn_reg, 'CNN Regularizada', x_train_resized, y_train_cat, x_test_resized, y_test_cat)

In [None]:
# 4.4. Entrenar y evaluar CNN avanzada
print("Entrenando CNN Avanzada...")
model_cnn_adv = build_cnn_advanced((32, 32, 3))
history_cnn_adv, time_cnn_adv = train_and_evaluate(model_cnn_adv, 'CNN Avanzada', x_train_resized, y_train_cat, x_test_resized, y_test_cat)

In [None]:
# 4.5. Entrenar y evaluar CNN con Transfer Learning
model_cnn_tl = build_cnn_transfer_learning((32, 32, 3))
history_resnet50, time_resnet50 = train_and_evaluate(model_cnn_tl, 'Transfer Learning ResNet50', x_train_resized, y_train_cat, x_test_resized, y_test_cat)

In [None]:
# 4.6. Entrenar y evaluar CNN con Fine Tuning
model_cnn_ft = build_cnn_fine_tuning((32, 32, 3))
history_resnet50, time_resnet50 = train_and_evaluate(model_cnn_ft, 'Fine Tuning ResNet50', x_train_resized, y_train_cat, x_test_resized, y_test_cat)


In [None]:
# Crear DataFrame con los resultados
df_results = pd.DataFrame(results)
print(df_results)

# Gráficas de las métricas
metrics = ['Exactitud', 'Precisión', 'Recall', 'F1-score', 'Tiempo de Entrenamiento (s)']
num_metrics = len(metrics)

plt.figure(figsize=(20, 15))

for i, metric in enumerate(metrics, 1):
    plt.subplot(3, 2, i)
    plt.bar(df_results['Modelo'], df_results[metric], color='skyblue')
    plt.title(metric)
    plt.ylabel(metric)
    plt.xticks(rotation=45, ha='right')
    for index, value in enumerate(df_results[metric]):
        if metric != 'Tiempo de Entrenamiento (s)':
            plt.text(index, value + 0.005, f"{value:.2f}", ha='center', va='bottom')
        else:
            plt.text(index, value + 1, f"{value:.2f}", ha='center', va='bottom')

plt.tight_layout()
plt.show()


In [None]:
# Definir el número de clases para clasificación binaria
NUM_CLASES = 2

# Cargar CIFAR-10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Índice de la clase "barco" en CIFAR-10
indice_barco = 8  # "ship" es la clase con índice 8

# Etiquetas: 1 para "barco", 0 para "no barco"
y_train_bin = np.where(y_train.flatten() == indice_barco, 1, 0)
y_test_bin = np.where(y_test.flatten() == indice_barco, 1, 0)

# Normalizar los valores de los píxeles en el rango [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Convertir las etiquetas a one-hot encoding
y_train_cat = to_categorical(y_train_bin, NUM_CLASES)
y_test_cat = to_categorical(y_test_bin, NUM_CLASES)

# Función para ajustar una imagen a 32x32
def preprocess_frame(frame):
    resized_frame = cv2.resize(frame, (32, 32))
    normalized_frame = resized_frame.astype('float32') / 255.0
    return normalized_frame

# Construir y compilar el modelo
model_barco = build_cnn_advanced((32, 32, 3))
optimizer = Adam(learning_rate=0.001)
model_barco.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Aumento de Datos para mejorar la generalización
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

datagen.fit(x_train)

# Entrenamiento del modelo con aumento de datos
model_barco.fit(
    datagen.flow(x_train, y_train_cat, batch_size=64),
    epochs=10,
    validation_data=(x_test, y_test_cat),
    verbose=2
)

# Evaluación del modelo
y_pred = model_barco.predict(x_test)
y_pred_classes = y_pred.argmax(axis=1)  # Para 'softmax'

precision = precision_score(y_test_bin, y_pred_classes)
recall = recall_score(y_test_bin, y_pred_classes)
f1 = f1_score(y_test_bin, y_pred_classes)

print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-Score: {f1}')

In [None]:
def sliding_window_detector_notebook(video_path, model, 
                                     window_size=(25, 25), step_size=2,
                                     save_dir='detected_frames'):
    # Crear directorio para guardar frames detectados si no existe
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        print(f"No se pudo abrir el video: {video_path}")
        return

    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    
    frame_count = 0
    detected_frame_count = 0

    while video.isOpened():
        ret, frame = video.read()
        if not ret:
            break
        
        frame_count += 1
        detections = []  # Guardar las coordenadas de las detecciones
        
        # Recorrer la imagen con ventana deslizante
        for y in range(0, frame_height - window_size[1] + 1, step_size):
            for x in range(0, frame_width - window_size[0] + 1, step_size):
                window = frame[y:y + window_size[1], x:x + window_size[0]]
                if window.shape[:2] != window_size:
                    continue
                
                # Preprocesar la ventana
                window2 = cv2.resize(window, (32, 32))
                processed_window = window2.astype('float32') / 255.0
                processed_window = np.expand_dims(processed_window, axis=0)  # Expandir las dimensiones
                
                # Predicción
                prediction = model.predict(processed_window)
                
                # Asumiendo que el modelo devuelve probabilidades con softmax
                predicted_class = prediction.argmax(axis=1)[0]
                
                if predicted_class == 1:
                    # Agregar coordenadas de la detección a la lista
                    detections.append((x, y, x + window_size[0], y + window_size[1]))
        
        # Dibujar las detecciones en el frame
        for (x1, y1, x2, y2) in detections:
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        # Si hay al menos una detección, guarda el frame
        if detections:
            detected_frame_count += 1
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            save_path = os.path.join(save_dir, f"frame_{frame_count}.jpg")
            cv2.imwrite(save_path, cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        
        # Convertir el frame a RGB para matplotlib
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Mostrar el frame con detecciones
        plt.figure(figsize=(10, 6))
        plt.imshow(frame_rgb)
        plt.title(f"Frame {frame_count} / {total_frames} - Detecciones: {len(detections)}")
        plt.axis('off')
        display(plt.gcf())
        plt.close()
        
        # Limpiar la salida para mostrar un frame a la vez en el notebook
        clear_output(wait=True)
    
    video.release()
    clear_output()
    print(f"Proceso completado. Total de frames procesados: {frame_count}")
    print(f"Total de frames con detecciones: {detected_frame_count}")
    print(f"Frames detectados guardados en la carpeta: '{save_dir}'")


In [None]:
# Ejecutar el detector en el video
sliding_window_detector_notebook('barco3.mp4', model_barco)