<a href="https://colab.research.google.com/github/19PA1AO5C4/Home-Assignment-3/blob/main/Q3%20Implementing%20an%20RNN%20for%20Text%20Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import tensorflow as tf
import numpy as np
import random
import sys

# 1. Load a text dataset
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
text = text.lower()  # Lowercase for simplicity

# 2. Convert text into sequences
chars = sorted(list(set(text)))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

maxlen = 40  # Sequence length
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

# 3. Define the RNN model
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(128, input_shape=(maxlen, len(chars))),
    tf.keras.layers.Dense(len(chars), activation='softmax')
])

# 4. Train the model and generate text
model.compile(optimizer='adam', loss='categorical_crossentropy')

def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def generate_text(length, temperature):
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated + '"')

    for i in range(length):
        x_pred = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(generated):
            x_pred[0, t, char_indices[char]] = 1.

        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, temperature)
        next_char = indices_char[next_index]

        generated += next_char
        generated = generated[1:]

        sys.stdout.write(next_char)
        sys.stdout.flush()
    print()

model.fit(x, y, batch_size=128, epochs=20) #Train for more epochs to get better results.

print("---Generating Text with Temperature 0.2---")
generate_text(400, 0.2)

print("---Generating Text with Temperature 1.0---")
generate_text(400, 1.0)

print("---Generating Text with Temperature 1.2---")
generate_text(400, 1.2)

# 5. Temperature scaling
# Temperature scaling controls the randomness of text generation.
# A lower temperature (e.g., 0.2) makes the model more confident in its
# predictions, resulting in more predictable and less surprising text.
# A higher temperature (e.g., 1.0 or 1.2) increases the randomness,
# leading to more diverse and potentially more creative text, but also more errors.

#A temperature of zero would cause the model to always chose the most likely character.

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  super().__init__(**kwargs)


Epoch 1/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m285s[0m 97ms/step - loss: 2.5874
Epoch 2/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m280s[0m 96ms/step - loss: 2.0164
Epoch 3/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m315s[0m 94ms/step - loss: 1.8470
Epoch 4/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m276s[0m 95ms/step - loss: 1.7427
Epoch 5/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m320s[0m 95ms/step - loss: 1.6723
Epoch 6/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 94ms/step - loss: 1.6242
Epoch 7/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 94ms/step - loss: 1.5819
Epoch 8/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m328s[0m 96ms/step - loss: 1.5462
Epoch 9/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m320s[0m 95ms/step - loss: 1.5195
Epoch 10/20
[1m2905/2905[0m [32m━━━━━━━━━━━