In [None]:
!pip install pytorch_lightning
import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping,ModelCheckpoint

import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import string
import numpy as np
import requests

In [2]:
# url = "https://www.gutenberg.org/files/100/100-0.txt"
# response = requests.get(url)

# with open("shakespeare.txt", "w", encoding='utf-8') as file:
#     file.write(response.text)


In [3]:
import tensorflow as tf
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [4]:
# Reading, then decoding for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')

Length of text: 1115394 characters


In [5]:
# Taking a look at the first 250 characters in text
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [6]:
# The unique characters in the file
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

65 unique characters


In [7]:
# Converting text to lowercase
text = text.lower()

In [8]:
# Removing punctuation (optional)
text = text.translate(str.maketrans('', '', string.punctuation))

In [9]:
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii, ch in int2char.items()}

# Encoding the text
encoded = np.array([char2int[ch] for ch in text])

In [10]:
class TextDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        x = self.data[idx:idx+self.seq_length]
        y = self.data[idx+1:idx+self.seq_length+1]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.long)

In [11]:
seq_length = 100
batch_size = 64

dataset = TextDataset(encoded, seq_length)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [12]:
class ShakespeareLSTM(pl.LightningModule):
    def __init__(self, num_chars, seq_length, hidden_units, num_layers, lr):
        super(ShakespeareLSTM, self).__init__()
        self.save_hyperparameters()

        self.num_chars = num_chars
        self.seq_length = seq_length
        self.hidden_units = hidden_units
        self.num_layers = num_layers
        self.lr = lr

        self.embedding = nn.Embedding(num_chars, hidden_units)
        self.lstm = nn.LSTM(hidden_units, hidden_units, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_units, num_chars)

    def forward(self, x, hidden):
        x = self.embedding(x)
        x, hidden = self.lstm(x, hidden)
        x = self.fc(x)
        return x, hidden

    def training_step(self, batch, batch_idx):
        x, y = batch
        hidden = self.init_hidden(x.size(0))
        y_hat, _ = self(x, hidden)
        loss = nn.CrossEntropyLoss()(y_hat.view(-1, self.num_chars), y.view(-1))
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        hidden = self.init_hidden(x.size(0))
        y_hat, _ = self(x, hidden)
        loss = nn.CrossEntropyLoss()(y_hat.view(-1, self.num_chars), y.view(-1))
        self.log('val_loss', loss)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer

    def init_hidden(self, batch_size):
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).to(self.device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).to(self.device)
        return (h0, c0)


In [13]:
# Defining the model
model = ShakespeareLSTM(num_chars=len(chars), seq_length=seq_length, hidden_units=512, num_layers=2, lr=0.001)

# Defining the early stopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',  # Metric to monitor
    patience=10,         # Number of epochs with no improvement after which training will be stopped
    verbose=True,        # Display messages when stopping early
    mode='min'           # Minimize the monitored metric
)
# Defining the model checkpoint callback
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',   # Metric to monitor
    filename='shakespeare-{epoch:02d}-{val_loss:.2f}',  # Filename format
    save_top_k=1,         # Save only the best model
    mode='min'            # Minimize the monitored metric
)

# Training the model with early stopping
trainer = pl.Trainer(max_epochs=30, callbacks=[early_stopping,checkpoint_callback])
trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.utilities.rank_zero:You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type      | Params
----------------------------------------
0 | embedding | Embedding | 14.8 K
1 | lstm      | LSTM      | 4

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved. New best score: 0.361


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.054 >= min_delta = 0.0. New best score: 0.307


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.024 >= min_delta = 0.0. New best score: 0.283


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.277


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.271


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.267


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.265


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.261


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.260


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.258


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.256


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.253


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.253


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric val_loss did not improve in the last 10 records. Best score: 0.253. Signaling Trainer to stop.


In [17]:
def preprocess_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text

def generate_text(model, start_str, char2int, int2char, num_chars=1000):
    model.eval()

    # Preprocess the input string
    input_str = preprocess_text(start_str)

    # Convert input string to integers
    input_data = [char2int[c] for c in input_str]

    generated_text = start_str

    hidden = model.init_hidden(1)

    for _ in range(num_chars):
        input_tensor = torch.tensor(input_data).unsqueeze(0).to(model.device)
        output, hidden = model(input_tensor, hidden)

        next_char_idx = torch.argmax(output[0, -1]).item()
        next_char = int2char[next_char_idx]

        generated_text += next_char
        input_data = input_data[1:] + [next_char_idx]

    return generated_text

# Generate text
start_string = "Shall I compare thee to a summer's day?"
print(generate_text(model, start_string, char2int, int2char))


Shall I compare thee to a summer's day?
that ever ever i did yet behold
o day o day o day o hateful day
never was seem but stand i will not hurt your hearth

third servingman
what are you

coriolanus
a gentleman

third servingman
a marvellous poor one

clarence
by heaven i will not do it to the death

catesby
god keep your lordship in that gracious mind

hastings
but i shall laugh at this a twelvemonth hence
that they who brought me in my masters hate
i live to look upon their tragedy
i tell thee catesby

catesby
what my lord

hastings
ere a fortitue seemed by the freshy of the feast and she lays it
on she hath made me four and then the rest
counting myself but bad till i be best
ill throw thy body in another room
and triumph henry in thy day of life
each part deprived of supple government
shall i at learn the tide will wash you off
or else you gentle my lord even so
the general is my lover i have been
the book of his report of him another
nothing so certain as i know the sun
do as the