<a href="https://colab.research.google.com/github/Sivagadiraju/Home_assignment3/blob/main/part%203.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import tensorflow as tf
import numpy as np
import os
import requests

# Step 1: Load the text data (Shakespeare Sonnets)
url = "https://www.gutenberg.org/cache/epub/1041/pg1041.txt"
text_path = tf.keras.utils.get_file("shakespeare.txt", url)
with open(text_path, 'r', encoding='utf-8') as f:
    text = f.read()

# Optional cleanup
text = text[text.find("THE SONNETS")+len("THE SONNETS"):text.find("A LOVER'S COMPLAINT")].strip()

# Step 2: Preprocess text - character-level tokenization
chars = sorted(set(text))
char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = np.array(chars)

text_as_int = np.array([char2idx[c] for c in text])

# Sequence generation
seq_length = 100
examples_per_epoch = len(text) // seq_length

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)

def split_input_target(seq):
    return seq[:-1], seq[1:]

dataset = sequences.map(split_input_target)

# Step 3: Prepare dataset for training
BATCH_SIZE = 64
BUFFER_SIZE = 10000
vocab_size = len(chars)

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

# Step 4: Build the model
embedding_dim = 256
rnn_units = 512

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim), # Removed batch_input_shape
    tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
])

model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

# Step 5: Train the model
EPOCHS = 10
model.fit(dataset, epochs=EPOCHS)

# Step 6: Generate text using temperature
def generate_text(model, start_string, temperature=1.0, gen_length=400):
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    generated = []
    model.reset_states()

    for _ in range(gen_length):
        predictions = model(input_eval)
        predictions = predictions[:, -1, :] / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)
        generated.append(idx2char[predicted_id])

    return start_string + ''.join(generated)

# Rebuild model for text generation with batch_size = 1
gen_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[1, None]),
    tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True),
    tf.keras.layers.Dense(vocab_size)
])
gen_model.set_weights(model.get_weights())

# Step 7: Generate and display sample text
print("\nGenerated Text with Temperature 0.5:")
print(generate_text(gen_model, start_string="Shall I compare thee to a summer's day?\n", temperature=0.5))

print("\nGenerated Text with Temperature 1.0:")
print(generate_text(gen_model, start_string="Shall I compare thee to a summer's day?\n", temperature=1.0))


Epoch 1/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 2s/step - loss: 3.7694
Epoch 2/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 2s/step - loss: 3.0412
Epoch 3/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 2s/step - loss: 2.7448
Epoch 4/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 2s/step - loss: 2.5277
Epoch 5/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 2s/step - loss: 2.4027
Epoch 6/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 2s/step - loss: 2.3024
Epoch 7/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 2s/step - loss: 2.2149
Epoch 8/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 2s/step - loss: 2.1543
Epoch 9/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 2s/step - loss: 2.0964
Epoch 10/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 2s/step - loss: 2.0432


ValueError: Unrecognized keyword arguments passed to Embedding: {'batch_input_shape': [1, None]}

In [6]:
# Rebuild model for text generation with batch_size = 1
gen_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim), # Removed batch_input_shape=[1, None]
    tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True),
    tf.keras.layers.Dense(vocab_size)
])
gen_model.set_weights(model.get_weights())

ValueError: You called `set_weights(weights)` on layer 'sequential_2' with a weight list of length 6, but the layer was expecting 0 weights.

In [7]:
# Rebuild model for text generation with batch_size = 1
gen_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[1, None]), # Added batch_input_shape=[1, None]
    tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'), # Added recurrent_initializer='glorot_uniform'
    tf.keras.layers.Dense(vocab_size)
])
gen_model.set_weights(model.get_weights()) # This should now work without error

ValueError: Unrecognized keyword arguments passed to Embedding: {'batch_input_shape': [1, None]}