In [4]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import string

# File Reading with try and except 
try:
    with open("F:\M.Tech_CollgeMaterials\DPL\Lb10\data\shakesphere.txt", "r", encoding="utf-8") as f:
        text = f.read()  # text variable stores the data from the shakespeare.txt file 
except FileNotFoundError:
    print("Error: shakespeare.txt not found. Please download the dataset and place it in the same directory.")
    exit()


text = text.lower()   # the text is converted to lowercase with .lower function and the state of the variable is updated 
text = text.translate(str.maketrans('', '', string.punctuation)) # 
vocab = sorted(list(set(text)))
char_to_index = {u:i for i, u in enumerate(vocab)}
index_to_char = np.array(vocab)

seq_length = 50
data = [char_to_index[c] for c in text]
n = len(data) - seq_length
train_data = data[:int(n * 0.8)]
val_data = data[int(n * 0.8):int(n * 0.9)]
test_data = data[int(n * 0.9):]

def create_sequences(data):
    xs = []
    ys = []
    for i in range(0, len(data) - seq_length, 1):
        x = data[i:i + seq_length]
        y = data[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

train_sequences, train_labels = create_sequences(train_data)
val_sequences, val_labels = create_sequences(val_data)
test_sequences, test_labels = create_sequences(test_data)


# 2. Model Building (modified to use SimpleRNN)
model = keras.Sequential([
    keras.layers.Embedding(len(vocab), 50, input_length=seq_length),
    keras.layers.SimpleRNN(128, return_sequences=True),
    keras.layers.SimpleRNN(128),
    keras.layers.Dense(len(vocab), activation='softmax')
])

# 3. Model Compilation (same as before)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 4. Training (Reduced epochs for faster runtime - adjust as needed)
model.fit(train_sequences, train_labels, epochs=30, batch_size=128, validation_data=(val_sequences, val_labels))


Epoch 1/30
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 31ms/step - accuracy: 0.2191 - loss: 2.9925 - val_accuracy: 0.2426 - val_loss: 2.7665
Epoch 2/30
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 29ms/step - accuracy: 0.2861 - loss: 2.5906 - val_accuracy: 0.3372 - val_loss: 2.3556
Epoch 3/30
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 29ms/step - accuracy: 0.3657 - loss: 2.2314 - val_accuracy: 0.3676 - val_loss: 2.2078
Epoch 4/30
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 30ms/step - accuracy: 0.3917 - loss: 2.0635 - val_accuracy: 0.3739 - val_loss: 2.1331
Epoch 5/30
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 29ms/step - accuracy: 0.4143 - loss: 1.9602 - val_accuracy: 0.3960 - val_loss: 2.0714
Epoch 6/30
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - accuracy: 0.4318 - loss: 1.8877 - val_accuracy: 0.3960 - val_loss: 2.0597
Epoch 7/30
[1m122/122

<keras.src.callbacks.history.History at 0x1c5f38be1d0>

In [5]:
def generate_text(model, start_index, length):
    # Initialize with a sequence of seq_length random characters.
    generated_text = [np.random.randint(0, len(vocab)) for _ in range(seq_length)]  
    generated_text.append(start_index) # Add the actual starting index

    for _ in range(10):
        input_seq = np.array([generated_text[-seq_length:]]) # correct slicing
        prediction = model.predict(input_seq)
        next_index = np.argmax(prediction[0])
        generated_text.append(next_index)
    return "".join([index_to_char[i] for i in generated_text]) # corrected index


start_index = np.random.randint(0, len(vocab))
generated = generate_text(model, start_index, 200)
print("Generated Text:\n", generated)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 229ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 217ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Generated Text:
 qnsovjwlmeway9fndqb5f45i
rimlv1t7opc4jglqzzlcex4xuing
  when 
