In [None]:
# ✅ Step 2: Install Dependencies
!pip install tensorflow matplotlib


In [2]:
!wget https://www.gutenberg.org/files/100/100-0.txt -O data/shakespeare.txt




data/shakespeare.txt: No such file or directory


In [7]:
import tensorflow as tf
import numpy as np
import os
from tensorflow.python.client import device_lib

# Check available accelerators
print("Available devices:")
print(device_lib.list_local_devices())

# Set up multi-GPU if available
strategy = tf.distribute.MirroredStrategy() if len(tf.config.list_physical_devices('GPU')) > 1 else tf.distribute.get_strategy()
print(f"Using strategy: {strategy.__class__.__name__}")

# Download Shakespeare dataset
!mkdir -p data
!wget https://www.gutenberg.org/files/100/100-0.txt -O data/shakespeare.txt
def clean_text(text):
    """More careful cleaning that maintains consistency"""
    text = text.replace('\r', ' ')  # Replace with space instead of removing
    text = text.replace('\n', ' ')  # Replace with space instead of removing
    text = ' '.join(text.split())  # Normalize spaces/
    return text

# 1. Load and clean text
with open('data/shakespeare.txt', 'r', encoding='utf-8') as f:
    text = clean_text(f.read())
print(f"Loaded text length: {len(text)} characters")

# 2. Create vocabulary from CLEANED text
vocab = sorted(set(text))
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
vocab_size = len(vocab)

print(f"Vocabulary size: {len(vocab)}")
print("Sample mappings:")
for char in list(vocab)[:5]:
    print(f"'{char}': {char2idx[char]}")

# 3. Convert text to integers
text_as_int = np.array([char2idx[c] for c in text])
print(f"Text as integers shape: {text_as_int.shape}")
print("Sample conversion:", text_as_int[:10])

Available devices:
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 17527652512067048116
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 14619377664
locality {
  bus_id: 1
  links {
    link {
      device_id: 1
      type: "StreamExecutor"
      strength: 1
    }
  }
}
incarnation: 7710084416329267826
physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
xla_global_id: 416903419
, name: "/device:GPU:1"
device_type: "GPU"
memory_limit: 14619377664
locality {
  bus_id: 1
  links {
    link {
      type: "StreamExecutor"
      strength: 1
    }
  }
}
incarnation: 184280172459430724
physical_device_desc: "device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5"
xla_global_id: 2144165316
]
Using strategy: MirroredStrategy


I0000 00:00:1754153941.208598      36 gpu_device.cc:2022] Created device /device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1754153941.208820      36 gpu_device.cc:2022] Created device /device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


--2025-08-02 16:59:01--  https://www.gutenberg.org/files/100/100-0.txt
Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47, 2610:28:3090:3000:0:bad:cafe:47
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5618733 (5.4M) [text/plain]
Saving to: ‘data/shakespeare.txt’


2025-08-02 16:59:01 (20.8 MB/s) - ‘data/shakespeare.txt’ saved [5618733/5618733]

Loaded text length: 5297571 characters
Vocabulary size: 98
Sample mappings:
' ': 0
'!': 1
'&': 2
''': 3
'(': 4
Text as integers shape: (5297571,)
Sample conversion: [ 6  6  6  0 41 42 23 40 42  0]


In [8]:
# Create dataset
SEQ_LENGTH = 100
BATCH_SIZE = 128
BUFFER_SIZE = 10000

def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

# Create and split dataset
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = char_dataset.batch(SEQ_LENGTH+1, drop_remainder=True)
dataset = sequences.map(split_input_target)

# Shuffle and batch
full_dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

# Train/validation split
dataset_size = len(list(full_dataset))
train_size = int(0.8 * dataset_size)
val_size = dataset_size - train_size

train_dataset = full_dataset.take(train_size)
val_dataset = full_dataset.skip(train_size)

# In your dataset pipeline, add:
train_dataset = train_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
val_dataset = val_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)

# Expected effect:
# - Batch time drops from 475ms → ~350ms
# - Full training time: ~4.4 hours (instead of 6.3)

print(f"Training batches: {len(list(train_dataset))}, Validation batches: {len(list(val_dataset))}")

Training batches: 327, Validation batches: 82


In [9]:
def build_improved_model(vocab_size, batch_size=None):
    model = tf.keras.Sequential()
    
    # Input layer with proper batch handling
    model.add(tf.keras.layers.InputLayer(
        input_shape=(None,),
        batch_size=batch_size))
    
    # Enhanced embedding layer
    model.add(tf.keras.layers.Embedding(
        input_dim=vocab_size,
        output_dim=384))  # Increased from 256 to 384
    
    # Batch normalization for stability
    model.add(tf.keras.layers.BatchNormalization())
    
    # First GRU layer with increased capacity
    model.add(tf.keras.layers.GRU(
        1536,  # Increased from 1024
        return_sequences=True,
        stateful=(batch_size is not None),
        dropout=0.3,  # Slightly higher dropout
        recurrent_dropout=0.3,
        recurrent_initializer='glorot_uniform'))
    
    # Second GRU layer
    model.add(tf.keras.layers.GRU(
        768,  # Increased from 512
        return_sequences=True,
        stateful=(batch_size is not None),
        dropout=0.2))
    
    # Output layer with softmax for better probability distribution
    model.add(tf.keras.layers.Dense(
        vocab_size,
        activation='softmax'))  # Changed to softmax
    
    return model


In [6]:
import os
import tensorflow as tf
from tensorflow.keras import optimizers
from tensorflow.keras import losses

# 1. Locate the last checkpoint in Kaggle's working directory
checkpoint_dir = '/kaggle/working/training_checkpoints/'
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)

# 2. Rebuild the model architecture (important!)
model = build_improved_model(vocab_size, BATCH_SIZE)

# 3. Load the weights
if latest_checkpoint:
    print(f"Resuming from {latest_checkpoint}")
    model.load_weights(latest_checkpoint)
    
    # Extract last completed epoch number
    initial_epoch = int(latest_checkpoint.split('_')[-1].split('.')[0])
else:
    initial_epoch = 0
    print("No checkpoint found, starting from scratch")

# 4. Recompile with same settings
model.compile(
    optimizer=optimizers.Adam(
        learning_rate=0.0005,
        clipnorm=1.0
    ),
    loss=losses.SparseCategoricalCrossentropy(from_logits=True),  # Common for text generation
    metrics=['accuracy']
)

# 5. Update callbacks (modified to avoid overwriting)
os.makedirs(checkpoint_dir, exist_ok=True)

# Corrected callback with proper file extension
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=os.path.join(checkpoint_dir, 'ckpt_{epoch:03d}.weights.h5'),  # Added .weights
    monitor='val_loss',
    save_weights_only=True,
    save_best_only=False,
    mode='min',
    verbose=1
)

# Best model saver (also corrected)
best_model_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=os.path.join(checkpoint_dir, 'best_model.weights.h5'),  # Added .weights
    monitor='val_loss',
    save_weights_only=True,
    save_best_only=True,
    mode='min',
    verbose=1
)

# 6. Continue training
history = model.fit(
    train_dataset.prefetch(tf.data.AUTOTUNE),
    initial_epoch=initial_epoch,  # Critical for correct logging
    epochs=50,  # Total epochs (original target)
    validation_data=val_dataset.prefetch(tf.data.AUTOTUNE),
    callbacks=[
        checkpoint_callback,
        best_model_callback,
        tf.keras.callbacks.EarlyStopping(patience=10),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=3)
    ]
)



No checkpoint found, starting from scratch
Epoch 1/50


  output, from_logits = _get_logits(
I0000 00:00:1754153933.050751     101 cuda_dnn.cc:529] Loaded cuDNN version 90300


KeyboardInterrupt: 

In [2]:
import os
!ls -la /kaggle/working/training_checkpoints/  # Verify files

ls: cannot access '/kaggle/working/training_checkpoints/': No such file or directory


In [None]:
# Save the entire model (HDF5 format)
model.save('/kaggle/working/full_model.h5') 

# OR save as SavedModel format (better for deployment)
model.save('/kaggle/working/text_generator_model')  # Creates a directory

In [None]:
# To make files persist beyond session end:
from IPython.display import FileLink
FileLink('shakespeare_model.zip')  # Click to download

# Or save to Kaggle datasets:
!mkdir -p /kaggle/working/model_assets
!cp -r /kaggle/working/saved_model /kaggle/working/model_assets/
!kaggle datasets init -p /kaggle/working/model_assets

In [None]:
# 1. Manually stop the training cell (click stop button)

# 2. Immediately save the model
model.save_weights('/content/drive/MyDrive/interrupted_model_epoch40.weights.h5')

# 3. Save training history
import pickle
with open('/content/drive/MyDrive/training_history.pkl', 'wb') as f:
    pickle.dump(history.history, f)

In [None]:
# 1. Rebuild the same architecture
new_model = build_improved_model(vocab_size, batch_size=BATCH_SIZE)

# 2. Load weights
new_model.load_weights('/content/drive/MyDrive/interrupted_model_epoch40.weights.h5')

# 3. Continue training
history = new_model.fit(
    train_dataset,
    initial_epoch=40,  # Critical: Start from epoch 41
    epochs=60,         # New total epochs
    validation_data=val_dataset,
    callbacks=[...]    # Same callbacks
)

In [None]:
# Rebuild model for generation (batch_size=1)
gen_model = build_improved_model(vocab_size, batch_size=1)
gen_model.load_weights('/content/char-textgen/training_checkpoints/ckpt_14.weights.h5')

# Generation function
def generate_text(model, start_string, temperature=0.7, num_generate=500):
    input_indices = [char2idx[s] for s in start_string]
    input_indices = tf.expand_dims(input_indices, 0)

    text_generated = []
    model.reset_states()

    for _ in range(num_generate):
        predictions = model(input_indices)
        predictions = tf.squeeze(predictions, 0)
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        input_indices = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])

    return start_string + ''.join(text_generated)

# Example generation
print(generate_text(gen_model, "ROMEO:", temperature=0.6))

In [None]:


# Visualization
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Validation')
plt.title('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title('Accuracy')
plt.legend()

plt.show()

In [None]:
# Plot training and validation loss
import matplotlib.pyplot as plt

plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()
plt.title('Loss Curve')
plt.show()


In [None]:
# Save the final trained model weights
model_save_path = '/content/drive/MyDrive/shakespeare_gru_model.weights.h5'
model.save_weights(model_save_path)

# Save vocabulary mappings (essential for generation)
import pickle
with open('/content/drive/MyDrive/shakespeare_vocab.pkl', 'wb') as f:
    pickle.dump({'char2idx': char2idx, 'idx2char': idx2char}, f)

print(f"Model saved to: {model_save_path}")
print("Vocabulary mappings saved")

In [None]:
def generate_text(model, start_string, temperature=0.7, num_generate=500):
    # Vectorize the start string
    input_indices = [char2idx[s] for s in start_string]
    input_indices = tf.expand_dims(input_indices, 0)

    text_generated = []
    model.reset_states()

    for _ in range(num_generate):
        predictions = model(input_indices)
        # Remove batch dimension
        predictions = tf.squeeze(predictions, 0)

        # Apply temperature scaling
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        # Append predicted character and update input
        text_generated.append(idx2char[predicted_id])
        input_indices = tf.expand_dims([predicted_id], 0)

    return start_string + ''.join(text_generated)

In [None]:
import pickle

with open('/content/drive/MyDrive/shakespeare_vocab.pkl', 'rb') as f:
    vocab_data = pickle.load(f)

char2idx = vocab_data['char2idx']
idx2char = vocab_data['idx2char']
vocab_size = len(idx2char)


In [None]:
embedding_dim = 256
rnn_units = 1024

model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model.load_weights('/content/drive/MyDrive/shakespeare_gru_model.weights.h5')
model.build(tf.TensorShape([1, None]))


In [None]:
def generate_text(model, start_string, char2idx, idx2char, temperature=1.0, num_generate=300):
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    # 🔁 Reset RNN states
    for layer in model.layers:
        if hasattr(layer, 'reset_states'):
            layer.reset_states()

    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)

        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])

    return start_string + ''.join(text_generated)


In [None]:
print(generate_text(
    model,
    start_string="once upon a time",
    char2idx=char2idx,
    idx2char=idx2char,
    temperature=1.5
))





In [None]:
# Example 1: Romantic opening
print(generate_text(model, "ROMEO:", temperature=0.7))

# Example 2: Philosophical
print(generate_text(model, "To be or not to be", temperature=0.5))

# Example 3: Comedy
print(generate_text(model, "Fool:", temperature=1.0))

# Example 4: Custom input
your_text = input("Enter starting text: ")
print(generate_text(model, your_text, temperature=0.8))

In [None]:
print(generate_text(model, start_string="she was waiting me", char2idx=char2idx, idx2char=idx2char))
