In [None]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.text import Tokenizer
import re

In [None]:
max_palabras = 10000
longitud_maxima = 100
dim_embedding = 128

(X_entrenamiento, y_entrenamiento), (X_prueba, y_prueba) = imdb.load_data(num_words=max_palabras)

X_entrenamiento = pad_sequences(X_entrenamiento, maxlen=longitud_maxima)
X_prueba = pad_sequences(X_prueba, maxlen=longitud_maxima)

In [None]:
def limpiar_texto(texto):
    texto = texto.lower()
    texto = re.sub(r"[^a-zA-Z0-9\s]", '', texto)
    return texto

def texto_a_secuencia(texto, word_index):
    secuencia = []
    for palabra in texto.split():
        if palabra in word_index and word_index[palabra] < max_palabras:
            secuencia.append(word_index[palabra] + 3)
        else:
            secuencia.append(2)
    return secuencia

In [None]:
modelo = Sequential()
modelo.add(Embedding(max_palabras, dim_embedding, input_length=longitud_maxima))
modelo.add(LSTM(64, return_sequences=False, kernel_regularizer=l2(0.01)))
modelo.add(Dropout(0.6))
modelo.add(Dense(1, activation='sigmoid'))

modelo.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

modelo.fit(X_entrenamiento, y_entrenamiento, epochs=10, batch_size=64,
           validation_data=(X_prueba, y_prueba), callbacks=[early_stopping])

puntuacion, precision = modelo.evaluate(X_prueba, y_prueba)
print(f"Precisión en el conjunto de prueba: {precision:.2f}")

word_index = imdb.get_word_index()

nuevos_comentarios = [
    "The movie was fantastic, really enjoyed the storyline and the performances!",
    "This was the worst movie I have ever seen, completely terrible and boring.",
    "An excellent movie with great acting and a compelling plot.",
]

X_nuevos = []
for comentario in nuevos_comentarios:
    comentario_limpio = limpiar_texto(comentario)
    secuencia = texto_a_secuencia(comentario_limpio, word_index)
    X_nuevos.append(secuencia)

X_nuevos = pad_sequences(X_nuevos, maxlen=longitud_maxima)

predicciones = modelo.predict(X_nuevos)

for i, comentario in enumerate(nuevos_comentarios):
    sentimiento = 'Positivo' if predicciones[i][0] > 0.5 else 'Negativo'
    print(f"Comentario: {comentario}")
    print(f"Predicción: {sentimiento} ({predicciones[i][0]:.2f})\n")

Epoch 1/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 192ms/step - accuracy: 0.6866 - loss: 1.0451 - val_accuracy: 0.8364 - val_loss: 0.3963
Epoch 2/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 194ms/step - accuracy: 0.8816 - loss: 0.3107 - val_accuracy: 0.8454 - val_loss: 0.3628
Epoch 3/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 182ms/step - accuracy: 0.9072 - loss: 0.2610 - val_accuracy: 0.8420 - val_loss: 0.3751
Epoch 4/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 196ms/step - accuracy: 0.9271 - loss: 0.2164 - val_accuracy: 0.8323 - val_loss: 0.4289
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 22ms/step - accuracy: 0.8464 - loss: 0.3661
Precisión en el conjunto de prueba: 0.85
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 227ms/step
Comentario: The movie was fantastic, really enjoyed the storyline and the performances!
Predicción: Positivo (0.80)

Comentario: This was the worst movie I have ever seen, completely terrible and boring.
Predicción: Negativo (0.02)

Comentario: An excellent movie with great acting and a compelling plot.
Predicción: Positivo (0.81)

