<a href="https://colab.research.google.com/github/Naaman-Juma/RNN-to-generate-text-similar-to-Shakespeare-s-Hamlet./blob/main/Recurrent_Neural_Network_(RNN)_to_generate_text_similar_to_Shakespeare%E2%80%99s_Hamlet_!.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

GROUP MEMBERS
1. NAAMAN JUMA - IN13/00102/21
2. GRANDEUR CERRULLO - IN13/00016/21

In [2]:
# Install required libraries
!pip install nltk tensorflow

# Import required libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Embedding, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import nltk
from nltk.corpus import gutenberg

# Download necessary datasets
nltk.download('gutenberg')

# Load Shakespeare's Hamlet
text = gutenberg.raw('shakespeare-hamlet.txt').lower()  # Convert to lowercase
print("Sample text:", text[:500])  # Show the first 500 characters




[nltk_data] Downloading package gutenberg to /root/nltk_data...
[nltk_data]   Unzipping corpora/gutenberg.zip.


Sample text: [the tragedie of hamlet by william shakespeare 1599]


actus primus. scoena prima.

enter barnardo and francisco two centinels.

  barnardo. who's there?
  fran. nay answer me: stand & vnfold
your selfe

   bar. long liue the king

   fran. barnardo?
  bar. he

   fran. you come most carefully vpon your houre

   bar. 'tis now strook twelue, get thee to bed francisco

   fran. for this releefe much thankes: 'tis bitter cold,
and i am sicke at heart

   barn. haue you had quiet guard?
  fran. not


In [4]:
def prepare_data(text, seq_length=40):
    chars = sorted(list(set(text)))  # Get all unique characters
    char_to_index = {char: idx for idx, char in enumerate(chars)}  # Mapping
    index_to_char = {idx: char for idx, char in enumerate(chars)}

    sequences = []
    next_chars = []

    for i in range(0, len(text) - seq_length - 1):  # Fix index range
        sequences.append([char_to_index[char] for char in text[i:i+seq_length]])
        next_chars.append(char_to_index[text[i+seq_length]])  # Next char prediction

    X = np.array(sequences)
    y = to_categorical(next_chars, num_classes=len(chars))

    return X, y, char_to_index, index_to_char

X, y, char_to_index, index_to_char = prepare_data(text)
print("Data Prepared: X shape =", X.shape, ", y shape =", y.shape)


Data Prepared: X shape = (162840, 40) , y shape = (162840, 44)


In [5]:
def build_rnn_model(input_shape, vocab_size):
    model = Sequential([
        Input(shape=(input_shape[1],)),  # Explicit Input Layer
        Embedding(input_dim=vocab_size, output_dim=50, input_length=input_shape[1]),
        SimpleRNN(128, return_sequences=True),  # First RNN layer
        SimpleRNN(128),  # Second RNN layer
        Dense(vocab_size, activation='softmax')  # Output layer
    ])
    model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
    return model

# Build the model
model = build_rnn_model(X.shape, len(char_to_index))

# Ensure model is built before calling summary
model.build((None, X.shape[1]))

# Print model summary (table output)
model.summary()




In [9]:
model.fit(X, y, epochs=10, batch_size=64)


Epoch 1/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 45ms/step - accuracy: 0.4695 - loss: 1.7688
Epoch 2/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m152s[0m 49ms/step - accuracy: 0.4903 - loss: 1.6830
Epoch 3/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 48ms/step - accuracy: 0.5079 - loss: 1.6177
Epoch 4/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 45ms/step - accuracy: 0.5157 - loss: 1.5809
Epoch 5/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 45ms/step - accuracy: 0.5266 - loss: 1.5476
Epoch 6/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 46ms/step - accuracy: 0.5338 - loss: 1.5263
Epoch 7/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 45ms/step - accuracy: 0.5371 - loss: 1.5001
Epoch 8/10
[1m2545/2545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 45ms/step - accuracy: 0.5434 - loss: 1.4821


<keras.src.callbacks.history.History at 0x7943e539a250>

In [17]:
import random
def generate_text(model, start_text, char_to_index, index_to_char, length=8):
    for _ in range(length):
        input_seq = np.array([[char_to_index[char] for char in start_text[-6:]]])  # Last 40 characters
        prediction = model.predict(input_seq, verbose=0)
        next_char = index_to_char[np.argmax(prediction)]
        start_text += next_char  # Append predicted character
    return start_text

# Generate new text based on a seed phrase
start_text = "to be or not to be, that is the question: "
generated_text = generate_text(model, start_text, char_to_index, index_to_char)
print("Generated Text:\n", generated_text)


Generated Text:
 to be or not to be, that is the question: so must 
