In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tokenization_module import TokenizerModule

from tensorflow.keras import layers, models, optimizers, callbacks, preprocessing

In [2]:
#Cargar datasets
train = pd.read_csv("dataset_clean/train_clean.csv")
val = pd.read_csv("dataset_clean/validation_clean.csv")
test = pd.read_csv("dataset_clean/test_clean.csv")

In [3]:
#Cargar tokenizador
tok = TokenizerModule()
tok.load_vectorizer("vectorizer")
encoder = tok.vectorizer
vocab = encoder.get_vocabulary()

In [4]:
#Vectorizar los datasets
X_train = tok.vectorize_texts(train['review_body'].astype(str))
X_val = tok.vectorize_texts(val['review_body'].astype(str))
X_test = tok.vectorize_texts(test['review_body'].astype(str))

#Tomar labels
Y_train = train["label"].astype('int32')
Y_val = val["label"].astype('int32')
Y_test = test["label"].astype('int32')

In [21]:
#Crear modelo
model = models.Sequential([
    tf.keras.layers.Embedding(
        input_dim=len(vocab),
        output_dim=64,
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, dropout=0.3, recurrent_dropout=0.3)),
    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-4)),
    tf.keras.layers.Dropout(0.6),
    tf.keras.layers.Dense(3, activation='softmax')
])

In [22]:
sample_text = ["Este es un texto de prueba en español, me gusta mucho hola satisfecho hola hola"]

# Vectorizar primero
sample_seq = encoder(sample_text)  # Tensor de enteros

# Predecir
predictions = model.predict(sample_seq)
print(predictions[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 986ms/step
[0.33078393 0.33442578 0.3347902 ]


In [23]:
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    metrics=['accuracy']
)

In [24]:
model.summary()

In [27]:
#Hiperparámetros
epochs = 10
batch_size = 256

In [26]:
callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

In [None]:
history = model.fit(
    X_train, Y_train,
    validation_data=(X_val, Y_val),
    epochs=epochs,
    batch_size=batch_size,
    callbacks=[callback]
)

Epoch 1/10
[1m 70/782[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m14:04[0m 1s/step - accuracy: 0.5052 - loss: 1.0382