In [25]:
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, Dense


# Cargar los datos
max_features = 10000
maxlen = 500
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)


# Preparar los datos
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)


# Crear el modelo RNN
model = Sequential()
model.add(Embedding(max_features, 32))
model.add(SimpleRNN(32, activation = "tanh")) #añadimos funcion de activacion
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


# Entrenar el modelo
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2)


# Evaluar el modelo
score = model.evaluate(X_test, y_test)
print(f'Test accuracy: {score[1]}')

# Score inicial: 0.83
# Score final: 0.78

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 63ms/step - accuracy: 0.5901 - loss: 0.6560 - val_accuracy: 0.6606 - val_loss: 0.6098
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 58ms/step - accuracy: 0.7814 - loss: 0.4813 - val_accuracy: 0.6866 - val_loss: 0.5833
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 56ms/step - accuracy: 0.8790 - loss: 0.3426 - val_accuracy: 0.7920 - val_loss: 0.4733
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 55ms/step - accuracy: 0.9692 - loss: 0.1150 - val_accuracy: 0.7834 - val_loss: 0.5397
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 58ms/step - accuracy: 0.9964 - loss: 0.0322 - val_accuracy: 0.7748 - val_loss: 0.6476
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step - accuracy: 0.7790 - loss: 0.6457
Test accuracy: 0.7806400060653687


In [22]:
"""
This Python script builds and trains a Recurrent Neural Network (RNN) to classify the sentiment of
movie reviews using the IMDb dataset. The model uses an Embedding layer and a SimpleRNN layer.
It is compiled and trained using the TensorFlow and Keras libraries.
"""
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence


# Cargar y preprocesar los datos de IMDB
max_features = 1000
maxlen = 100


(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)


# Crear y entrenar el modelo RNN
model = Sequential()
model.add(Embedding(max_features, 32, input_length=maxlen))
model.add(SimpleRNN(64, activation='tanh'))
model.add(tf.keras.layers.Dropout(0.2)) # Añadimos dropout para evitar overfitting
model.add(tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis = -1))) # incrementamos dimensiones para poder agregar mas capas recurrentes
model.add(SimpleRNN(64, activation='tanh'))
#model.add(tf.keras.layers.Dropout(0.2)) # Añadimos dropout para evitar overfitting

model.add(Dense(1, activation='sigmoid'))


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2)


# Evaluar el modelo
accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {accuracy[1]}')

# Score inicial: 0.79
# Score final: 0.82

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 33ms/step - accuracy: 0.5170 - loss: 0.6959 - val_accuracy: 0.5608 - val_loss: 0.6751
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.6467 - loss: 0.6066 - val_accuracy: 0.6722 - val_loss: 0.6098
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 22ms/step - accuracy: 0.7024 - loss: 0.5712 - val_accuracy: 0.7764 - val_loss: 0.4688
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 25ms/step - accuracy: 0.8058 - loss: 0.4359 - val_accuracy: 0.8064 - val_loss: 0.4295
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.8140 - loss: 0.4264 - val_accuracy: 0.7960 - val_loss: 0.4595
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.8018 - loss: 0.4469
Test accuracy: 0.8021199703216553


In [23]:
"""
This Python script builds and trains a Recurrent Neural Network (RNN) to generate text based on an input sequence.
The model uses an Embedding layer and a SimpleRNN layer to predict the next word in a sequence.
"""
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
import numpy as np


# Datos de entrada (Cambiamos para que sea el inicio del quijote)
text = "en un lugar de la Mancha de cuyo nombre no quiero acordarme, no ha mucho tiempo que vivía un hidalgo' de los de lanza en astillero', adarga antigua, rocín flaco y galgo corredor. U na olla de algo más vaca que carnero, salpicón' las más noches, duelos y quebrantos' los sábados, lintejas los viernes, algún palomino de añadidura los domingos, consumían las tres partes' de su hacienda."
# Tokenizar el texto
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
encoded = tokenizer.texts_to_sequences([text])[0]


# Preparar datos
vocab_size = len(tokenizer.word_index) + 1
sequences = []
for i in range(1, len(encoded)):
    sequence = encoded[:i+1]
    sequences.append(sequence)
sequences = pad_sequences(sequences, maxlen=max(len(seq) for seq in sequences), padding='pre')
X, y = sequences[:,:-1], sequences[:,-1]
y = to_categorical(y, num_classes=vocab_size)


# Crear el modelo RNN

model = Sequential()
model.add(Embedding(vocab_size, 16, input_length=X.shape[1]))
model.add(SimpleRNN(64, activation='relu'))
model.add(tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis = -1))) # incrementamos dimensiones para poder agregar mas capas recurrentes
model.add(SimpleRNN(64, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))


model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


# Entrenar el modelo
model.fit(X, y, epochs=500, verbose=0)


# Generar texto
def generate_text(model, tokenizer, seed_text, n_words):
    result = seed_text
    for _ in range(n_words):
        encoded = tokenizer.texts_to_sequences([seed_text])[0]
        encoded = pad_sequences([encoded], maxlen=X.shape[1], padding='pre')
        y_pred = np.argmax(model.predict(encoded), axis=-1)
        word = tokenizer.index_word[y_pred[0]]
        seed_text += ' ' + word
        result += ' ' + word
    return result


# Texto generado
print(generate_text(model, tokenizer, 'esta es', 1000))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 612ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2