In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense, LSTM

# Para ver la versión de TensorFlow y asegurarnos que todo está correcto
print("TensorFlow version:", tf.__version__)


TensorFlow version: 2.18.0


In [None]:
# Carga los datos: palabras convertidas en números (índices)
# num_words=10000 limita el vocabulario a las 10,000 palabras más comunes para simplificar
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

print("Número de muestras de entrenamiento:", len(x_train))
print("Número de muestras de prueba:", len(x_test))

# Las reseñas tienen diferente longitud, las vamos a recortar o rellenar a longitud fija (por ejemplo 200)
maxlen = 200
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

print("Tamaño de la reseña de entrenamiento después del padding:", x_train.shape)


Número de muestras de entrenamiento: 25000
Número de muestras de prueba: 25000
Tamaño de la reseña de entrenamiento después del padding: (25000, 200)


In [None]:
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=32, input_length=maxlen))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()




In [None]:
history = model.fit(
    x_train, y_train,
    epochs=5,
    batch_size=64,
    validation_split=0.2
)


Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 114ms/step - accuracy: 0.6829 - loss: 0.5704 - val_accuracy: 0.8704 - val_loss: 0.3233
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 107ms/step - accuracy: 0.8979 - loss: 0.2715 - val_accuracy: 0.8696 - val_loss: 0.3146
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 105ms/step - accuracy: 0.9336 - loss: 0.1846 - val_accuracy: 0.8644 - val_loss: 0.3128
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 112ms/step - accuracy: 0.9466 - loss: 0.1498 - val_accuracy: 0.8702 - val_loss: 0.3554
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 105ms/step - accuracy: 0.9598 - loss: 0.1230 - val_accuracy: 0.8550 - val_loss: 0.3902


In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Precisión en test: {accuracy*100:.2f}%")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 23ms/step - accuracy: 0.8460 - loss: 0.4210
Precisión en test: 84.63%


In [None]:
# Preprocesar igual que el entrenamiento: tokenizar y hacer padding
# Need to get the word index from the imdb dataset to convert comments
word_index = imdb.get_word_index()
# Reverse the word index to get index -> word mapping if needed, though not directly for tokenization
# reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

# Function to convert text to sequences using the imdb word index
def text_to_sequence(text, word_index, maxlen):
    # Convert to lowercase and split into words
    words = text.lower().split()
    sequence = []
    # Convert words to their index, handling unknown words (index 0)
    for word in words:
        sequence.append(word_index.get(word, 0)) # Use 0 for unknown words
    # Pad the sequence
    return tf.keras.preprocessing.sequence.pad_sequences([sequence], maxlen=maxlen)[0]

# Define maxlen based on the previous padding
maxlen = 200 # Make sure this matches the maxlen used before

comentarios_procesados = []
for comentario in comentarios:
    comentarios_procesados.append(text_to_sequence(comentario, word_index, maxlen))

# Convert the list of sequences to a numpy array for prediction
import numpy as np
comentarios_procesados = np.array(comentarios_procesados)


# Predecir
predicciones = model.predict(comentarios_procesados)

for i, pred in enumerate(predicciones):
    sentimiento = "Positivo" if pred > 0.5 else "Negativo"
    print(f"Comentario: {comentarios[i]}")
    print(f"Predicción: {pred:.4f} → {sentimiento}\n")

NameError: name 'comentarios' is not defined