<a href="https://colab.research.google.com/github/Naaman-Juma/Recurrent-Neural-Network-RNN-/blob/main/Recurrent_Neural_Network_(RNN)_to_generate_text_similar_to_Shakespeare%E2%80%99s_Hamlet_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

GROUP MEMBERS
1. NAAMAN JUMA - IN13/00102/21
2. GRANDEUR CERRULLO - IN13/00016/21

In [1]:
!pip install nltk tensorflow



In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Embedding
from tensorflow.keras.utils import to_categorical
import nltk
from nltk.corpus import gutenberg


In [3]:
nltk.download('gutenberg')  # Download the dataset
text = gutenberg.raw('shakespeare-hamlet.txt').lower()  # Convert to lowercase
print("Sample text:", text[:500])  # Show the first 500 characters


[nltk_data] Downloading package gutenberg to /root/nltk_data...
[nltk_data]   Unzipping corpora/gutenberg.zip.


Sample text: [the tragedie of hamlet by william shakespeare 1599]


actus primus. scoena prima.

enter barnardo and francisco two centinels.

  barnardo. who's there?
  fran. nay answer me: stand & vnfold
your selfe

   bar. long liue the king

   fran. barnardo?
  bar. he

   fran. you come most carefully vpon your houre

   bar. 'tis now strook twelue, get thee to bed francisco

   fran. for this releefe much thankes: 'tis bitter cold,
and i am sicke at heart

   barn. haue you had quiet guard?
  fran. not


In [4]:
def prepare_data(text, seq_length=40):
    chars = sorted(list(set(text)))  # Get all unique characters
    char_to_index = {char: idx for idx, char in enumerate(chars)}  # Mapping
    index_to_char = {idx: char for idx, char in enumerate(chars)}

    sequences = []
    next_chars = []

    for i in range(0, len(text) - seq_length):
        sequences.append([char_to_index[char] for char in text[i:i+seq_length]])
        next_chars.append(char_to_index[text[i+seq_length]])

    X = np.array(sequences)
    y = to_categorical(next_chars, num_classes=len(chars))

    return X, y, char_to_index, index_to_char

X, y, char_to_index, index_to_char = prepare_data(text)
print("Data Prepared: X shape =", X.shape, ", y shape =", y.shape)


Data Prepared: X shape = (162841, 40) , y shape = (162841, 44)


In [5]:
def build_rnn_model(input_shape, vocab_size):
    model = Sequential([
        Embedding(input_dim=vocab_size, output_dim=50, input_length=input_shape[1]),
        SimpleRNN(128, return_sequences=True),  # First RNN layer
        SimpleRNN(128),  # Second RNN layer
        Dense(vocab_size, activation='softmax')  # Output layer
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = build_rnn_model(X.shape, len(char_to_index))
model.summary()




In [6]:
model.fit(X, y, epochs=2, batch_size=64)


Epoch 1/2
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 49ms/step - accuracy: 0.2916 - loss: 2.5055
Epoch 2/2
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 49ms/step - accuracy: 0.4444 - loss: 1.8666


<keras.src.callbacks.history.History at 0x7e10557b85d0>

In [7]:
def generate_text(model, start_text, char_to_index, index_to_char, length=200):
    for _ in range(length):
        input_seq = np.array([[char_to_index[char] for char in start_text[-40:]]])  # Last 40 characters
        prediction = model.predict(input_seq, verbose=0)
        next_char = index_to_char[np.argmax(prediction)]
        start_text += next_char  # Append predicted character
    return start_text

# Generate new text based on a seed phrase
start_text = "to be or not to be, that is the question: "
generated_text = generate_text(model, start_text, char_to_index, index_to_char)
print("Generated Text:\n", generated_text)


Generated Text:
 to be or not to be, that is the question: i will the senfer the senfer the senfer the senfer the senfer the senfer the senfer the senfer the senfer the senfer the senfer the senfer the senfer the senfer the senfer the senfer the senfer the se
