In [1]:
%pip install tensorflow
%pip install nltk

^C
Note: you may need to restart the kernel to use updated packages.


IMPORT libraries and datasets

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

df_train = pd.read_csv("train_vids.csv")
df_val = pd.read_csv("val_vids.csv")
df_test = pd.read_csv("test_vids.csv")

ModuleNotFoundError: No module named 'pandas'

Modify datasets

In [None]:
def parse_points(points_str):
    points = np.array(eval(points_str))
    return points

In [1]:
def pointsToCnnInputForm(frame):
    returnVector=[]
    for keyFrame in frame:
        zero_row = [0] * 36
        keyFrame = np.concatenate([keyFrame, zero_row])
        keyFrame = keyFrame.reshape(46,48)
        zeros_5x20 = np.zeros((2, 48))
        returnVector.append(np.vstack((keyFrame, zeros_5x20)))
    return returnVector

Cambio el formato de points para que sea una lista de 48x48

In [None]:
df_train['points']=df_train['points'].apply(parse_points)
df_val['points']=df_val['points'].apply(parse_points)
df_test['points']=df_test['points'].apply(parse_points)
df_train['points']=df_train['points'].apply(pointsToCnnInputForm)
df_val['points']=df_val['points'].apply(pointsToCnnInputForm)
df_test['points']=df_test['points'].apply(pointsToCnnInputForm)

In [None]:
#ordenar por longitud
df_train=df_train.sort_values(by='len_keyframes')
df_test=df_test.sort_values(by='len_keyframes')
df_val=df_val.sort_values(by='len_keyframes')

Tokenizar texto de traducciones para que sea compatible con el modelo


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer

# Crear el tokenizador
tokenizer = Tokenizer()

# Ajustar el tokenizador al texto de las traducciones
tokenizer.fit_on_texts(df_train['translation'])

# Convertir las oraciones en secuencias de enteros
df_train['translation_sequence'] = tokenizer.texts_to_sequences(df_train['translation'])
df_val['translation_sequence'] = tokenizer.texts_to_sequences(df_val['translation'])
df_test['translation_sequence'] = tokenizer.texts_to_sequences(df_test['translation'])


Creo datasets de Tensorflow

In [None]:
def create_tf_dataset(df):
    points = np.stack(df['points'].values)
    sequence = np.stack(df['translation_sequence'].values)
    return tf.data.Dataset.from_tensor_slices((points, sequence))

train_dataset = create_tf_dataset(df_train)
val_dataset = create_tf_dataset(df_val)
test_dataset = create_tf_dataset(df_test)

Hago el padding para que las secuencias dentro de los batches tengan la misma longitud

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

def pad_sequences_fn(points, maxlen):
    return pad_sequences(points, maxlen=maxlen, dtype='float32', padding='post')

def dynamic_padding_fn(max_len, batch):
    def pad_batch(batch):
        points, labels = batch
        # Convertir el batch a una lista de secuencias
        points_padded = tf.numpy_function(pad_sequences_fn, [points, max_len], tf.float32)
        return points_padded, labels
    return pad_batch

def get_max_len(points):
    # Encuentra la longitud máxima en el batch actual
    lengths = tf.map_fn(lambda x: tf.shape(x)[0], points, dtype=tf.int32)
    return tf.reduce_max(lengths)

def pad_batches(dataset):
    def pad_batch_fn(batch):
        points, sequence = batch
        max_len = get_max_len(points)  # Encuentra la longitud máxima en el batch
        return dynamic_padding_fn(max_len,batch)  # Aplica el padding dinámico
    mapped_ds=dataset.map(pad_batch_fn)
    return mapped_ds

train_dataset = pad_batches(train_dataset.batch(32))
val_dataset = pad_batches(val_dataset.batch(32))
test_dataset = pad_batches(test_dataset.batch(32))

Crear modelo,**EJEMPLO**

CNN

In [None]:
def create_cnn():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    return model

LSTM

In [None]:
def create_lstm():
    model=models.Sequential()
    # Primera capa LSTM con return_sequences=True
    model.add(layers.LSTM(128, return_sequences=True))
    model.add(layers.Dropout(0.3)) # Añadir Dropout
    
    # Segunda capa LSTM con return_sequences=True
    model.add(layers.LSTM(128, return_sequences=True))
    model.add(layers.Dropout(0.3))

    # Tercera capa LSTM con return_sequences=False
    model.add(layers.LSTM(64, return_sequences=False))
    model.add(layers.Dropout(0.2))
    return model

CNN+LSTM

In [None]:
def create_cnn_lstm_model(num_classes):
    cnn = create_cnn()
    
    video_input = layers.Input(shape=(None, 48, 48, 1)) 
    
    # Aplicar CNN a cada frame usando TimeDistributed
    cnn_features = layers.TimeDistributed(cnn)(video_input)
    
    lstm=create_lstm()
    lstm_out= lstm(cnn_features)
    # Capa final de salida
    output = layers.Dense(num_classes, activation='softmax')(lstm_out)
    
    model = models.Model(inputs=video_input, outputs=output)
    return model

Implementación para el entrenamiento

Usar bleu score

In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def calculate_bleu(references, hypotheses):
    smoothing = SmoothingFunction().method4
    return np.mean([sentence_bleu([ref], hyp, smoothing_function=smoothing) for ref, hyp in zip(references, hypotheses)])

In [None]:
num_classes=len(tokenizer.word_index) + 1
model = create_cnn_lstm_model(num_classes)

# Compilar el modelo
#model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='RUTA',  # Ruta donde se guarda el modelo
    save_best_only=True,       # Guardar solo si es el mejor modelo hasta ahora
    monitor='val_bleu',        # Métrica a monitorear
    mode='max'                 # Modo: minimizar la métrica monitorizada
)

early_stopping = tf.keras.callbacks.EarlyStopping(patience=4, restore_best_weights=True)

In [None]:
# Entrenar el modelo usando un ciclo de entrenamiento personalizado
epochs = 20  # Ajusta el número de épocas según sea necesario

# Iterar a través de los batches y ajustar manualmente los parámetros
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    for step, (points_batch, translation_batch) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            predictions = model(points_batch, training=True)
            loss = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(translation_batch, predictions))
        
        # Calcular y aplicar gradientes
        gradients = tape.gradient(loss, model.trainable_variables)
        model.optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
        if step % 10 == 0:
            print(f"Step {step}: loss = {loss.numpy()}")

    val_loss, val_acc = model.evaluate(val_dataset)
    print(f"Validation loss: {val_loss}, Validation accuracy: {val_acc}")
    
    # Evaluar en el conjunto de validación al final de cada epoch usando BLEU
    val_references = [tokenizer.sequences_to_texts([ref.numpy()]) for ref in val_dataset.map(lambda x, y: y)]
    val_predictions = model.predict(val_dataset.map(lambda x, y: x))
    val_hypotheses = [tokenizer.sequences_to_texts([pred]) for pred in np.argmax(val_predictions, axis=-1)]
    val_bleu = calculate_bleu(references = val_references, hypotheses = val_hypotheses)
    print(f"Validation BLEU: {val_bleu}")

    checkpoint_callback.on_epoch_end(epoch, logs={'val_loss': val_loss, 'val_accuracy': val_acc, 'val_bleu': val_bleu})
    if early_stopping.on_epoch_end(epoch, logs={'val_loss': val_loss, 'val_accuracy': val_acc, 'val_bleu': val_bleu}):
        print("Early stopping triggered")
        break
    
    checkpoint_callback.on_epoch_end(epoch, logs={'val_bleu': val_bleu, 'val_accuracy': val_acc})
    # Comprobación temprana
    if early_stopping.on_epoch_end(epoch, logs={'val_loss': val_loss, 'val_accuracy': val_acc}):
        print("Early stopping triggered")
        break

# Evaluar el modelo en el dataset de test
test_loss, test_acc = model.evaluate(test_dataset)
print(f'Test accuracy: {test_acc}')