# Original Code

In [1]:
"""
This Python script builds and trains a Recurrent Neural Network (RNN) to generate text based on an input sequence.
The model uses an Embedding layer and a SimpleRNN layer to predict the next word in a sequence.
"""
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
import numpy as np

# Datos de entrada
text = "esta es una demostración de cómo una RNN puede generar texto basado en un texto de entrada."

# Tokenizar el texto
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
encoded = tokenizer.texts_to_sequences([text])[0]

# Preparar datos
vocab_size = len(tokenizer.word_index) + 1
sequences = []
for i in range(1, len(encoded)):
    sequence = encoded[:i+1]
    sequences.append(sequence)
sequences = pad_sequences(sequences, maxlen=max(len(seq) for seq in sequences), padding='pre')
X, y = sequences[:,:-1], sequences[:,-1]
y = to_categorical(y, num_classes=vocab_size)

# Crear el modelo RNN
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=X.shape[1]))
model.add(SimpleRNN(50, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Entrenar el modelo
model.fit(X, y, epochs=500, verbose=0)

# Generar texto
def generate_text(model, tokenizer, seed_text, n_words):
    result = seed_text
    for _ in range(n_words):
        encoded = tokenizer.texts_to_sequences([seed_text])[0]
        encoded = pad_sequences([encoded], maxlen=X.shape[1], padding='pre')
        y_pred = np.argmax(model.predict(encoded), axis=-1)
        word = tokenizer.index_word[y_pred[0]]
        seed_text += ' ' + word
        result += ' ' + word
    return result

# Texto generado
print(generate_text(model, tokenizer, 'esta es', 10))



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 337ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
esta es una demostración de cómo una rnn puede generar texto basado


# Modified Code

In [2]:
from tensorflow.keras.layers import LSTM, Dropout

# Input data
text = "esta es una demostración de cómo una RNN puede generar texto basado en un texto de entrada."

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
encoded = tokenizer.texts_to_sequences([text])[0]

# Prepare data
vocab_size = len(tokenizer.word_index) + 1
sequences = []
for i in range(1, len(encoded)):
    sequence = encoded[:i+1]
    sequences.append(sequence)
max_sequence_length = max(len(seq) for seq in sequences)
sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='pre')
X, y = sequences[:, :-1], sequences[:, -1]
y = to_categorical(y, num_classes=vocab_size)

# Create the modified RNN model
model = Sequential()
model.add(Embedding(vocab_size, 50, input_length=X.shape[1]))  # Increased embedding size
model.add(Dropout(0.2))  # Dropout after embedding layer
model.add(LSTM(50, activation='tanh'))  # Replaced SimpleRNN with LSTM
model.add(Dropout(0.2))  # Dropout after LSTM layer
model.add(Dense(vocab_size, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=500, verbose=0)

# Generate text
def generate_text(model, tokenizer, seed_text, n_words):
    result = seed_text
    for _ in range(n_words):
        encoded = tokenizer.texts_to_sequences([seed_text])[0]
        encoded = pad_sequences([encoded], maxlen=X.shape[1], padding='pre')
        y_pred = np.argmax(model.predict(encoded), axis=-1)
        word = tokenizer.index_word.get(y_pred[0], '')
        seed_text += ' ' + word
        result += ' ' + word
    return result

# Generated text
print(generate_text(model, tokenizer, 'esta es', 10))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 201ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
esta es una demostración de cómo una rnn puede generar texto basado


**Explanation of Changes:**

* LSTM Layer Impact: Switching to LSTM should improve the model's ability to produce contextually relevant text, as LSTM units are better at managing dependencies over longer sequences than SimpleRNN.
* Increased Embedding Dimension: This should improve the richness of word embeddings, allowing the model to better understand relationships between words.
* Dropout Layers: Dropout regularizes the model, reducing the chance of overfitting, especially with small datasets.

**Analysis of Changes and Observations:**

The results for both the original and modified codes are nearly identical, indicating that while the model has learned the sequence well, it may be overfitting to the small amount of input text provided. This behavior is expected when training a language model on limited data, as it can quickly memorize the input rather than generalizing patterns for diverse text generation.