In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import torch
import torch.nn as nn

In [None]:
filename = '/content/shakespeare_data.txt'

with open(filename) as files:
  text = files.read()

In [None]:
print("length of dataset in characters" , len(text))

length of dataset in characters 5283879


In [None]:
print(text[:1000])

  	A LOVER'S COMPLAINT



FROM off a hill whose concave womb reworded
A plaintful story from a sistering vale,
My spirits to attend this double voice accorded,
And down I laid to list the sad-tuned tale;
Ere long espied a fickle maid full pale,
Tearing of papers, breaking rings a-twain,
Storming her world with sorrow's wind and rain.

Upon her head a platted hive of straw,
Which fortified her visage from the sun,
Whereon the thought might think sometime it saw
The carcass of beauty spent and done:
Time had not scythed all that youth begun,
Nor youth all quit; but, spite of heaven's fell rage,
Some beauty peep'd through lattice of sear'd age.

Oft did she heave her napkin to her eyne,
Which on it had conceited characters,
Laundering the silken figures in the brine
That season'd woe had pelleted in tears,
And often reading what contents it bears;
As often shrieking undistinguish'd woe,
In clamours of all size, both high and low.

Sometimes her levell'd eyes their carriage ride,
As they d

In [None]:
vocab = sorted(list(set(text)))
print(''.join(vocab))
mapchartoint = {ch:i for i,ch in enumerate(vocab)}
mapinttochar = {i:ch for i,ch in enumerate(vocab)}
print(mapchartoint)
print(mapinttochar)

	
 !$&'(),-.0123456789:;?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]abcdefghijklmnopqrstuvwxyz|
{'\t': 0, '\n': 1, ' ': 2, '!': 3, '$': 4, '&': 5, "'": 6, '(': 7, ')': 8, ',': 9, '-': 10, '.': 11, '0': 12, '1': 13, '2': 14, '3': 15, '4': 16, '5': 17, '6': 18, '7': 19, '8': 20, '9': 21, ':': 22, ';': 23, '?': 24, 'A': 25, 'B': 26, 'C': 27, 'D': 28, 'E': 29, 'F': 30, 'G': 31, 'H': 32, 'I': 33, 'J': 34, 'K': 35, 'L': 36, 'M': 37, 'N': 38, 'O': 39, 'P': 40, 'Q': 41, 'R': 42, 'S': 43, 'T': 44, 'U': 45, 'V': 46, 'W': 47, 'X': 48, 'Y': 49, 'Z': 50, '[': 51, ']': 52, 'a': 53, 'b': 54, 'c': 55, 'd': 56, 'e': 57, 'f': 58, 'g': 59, 'h': 60, 'i': 61, 'j': 62, 'k': 63, 'l': 64, 'm': 65, 'n': 66, 'o': 67, 'p': 68, 'q': 69, 'r': 70, 's': 71, 't': 72, 'u': 73, 'v': 74, 'w': 75, 'x': 76, 'y': 77, 'z': 78, '|': 79}
{0: '\t', 1: '\n', 2: ' ', 3: '!', 4: '$', 5: '&', 6: "'", 7: '(', 8: ')', 9: ',', 10: '-', 11: '.', 12: '0', 13: '1', 14: '2', 15: '3', 16: '4', 17: '5', 18: '6', 19: '7', 20: '8', 21: '9', 22: ':', 23: ';

In [None]:
encode = lambda s : [mapchartoint[c] for c in s] # encode
decode = lambda l : ''.join([mapinttochar[i] for i in l]) # decode

In [None]:
print(encode("Hello"))
print(decode(encode("Hello")))

[32, 57, 64, 64, 67]
Hello


In [None]:
len(mapchartoint)

80

In [None]:
data = torch.tensor(encode(text),dtype=torch.long)

In [None]:
n = int(0.9*len(data))
train_text = data[:n]
eval_text = data[n:]

print(f"Number of training lines: {len(train_text)}")
print(f"Number of validation lines: {len(eval_text)}")

Number of training lines: 4755491
Number of validation lines: 528388


In [None]:
eval_text[:100]

tensor([60, 67, 73,  2, 75, 60, 57, 72,  6, 71, 72,  2, 53,  2, 63, 66, 61, 58,
        57,  2, 72, 67,  2, 63, 61, 64, 64,  2, 72, 60, 77, 71, 57, 64, 58, 11,
         1,  0, 44, 60, 57,  2, 72, 61, 65, 57,  2, 75, 61, 64, 64,  2, 55, 67,
        65, 57,  2, 75, 60, 57, 66,  2, 72, 60, 67, 73,  2, 71, 60, 53, 64, 72,
         2, 75, 61, 71, 60,  2, 58, 67, 70,  2, 65, 57,  1,  0, 44, 67,  2, 60,
        57, 64, 68,  2, 72, 60, 57, 57,  2, 55])

In [None]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):

    def __init__(self, data, block_size):

      self.data = data
      self.block_size = block_size
      self.source , self.labels = self.create_data(self.data,self.block_size)

    def __len__(self):

        return len(self.data)//self.block_size - 1

    def create_data(self , data , block_size):
      source_lines = []
      labels_lines = []
      for i in range(0,len(data),block_size):
        line = data[i:i+block_size]
        label = data[i+1:block_size+i+1]
        if len(line) < block_size:
          continue
        source_lines.append(line)
        labels_lines.append(label)
      return source_lines , labels_lines

    def __getitem__(self, idx):

        return self.source[idx],self.labels[idx]







In [None]:
train_dataset = CustomDataset(train_text,block_size=8)
val_dataset = CustomDataset(eval_text,block_size=8)

In [None]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_units):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, n_units, batch_first=True)
        self.fc = nn.Linear(n_units, vocab_size)
        self.log_softmax = nn.LogSoftmax(dim=-1)

    def forward(self, x):

        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = self.fc(x)
        x = self.log_softmax(x)
        return x



In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


In [None]:
import os
def save_checkpoint(model, optimizer, epoch, path):

    directory = os.path.dirname(path)
    if not os.path.exists(directory):
          os.makedirs(directory)

    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }

    torch.save(checkpoint, path)

    print(f"Checkpoint saved at epoch {epoch}")

In [None]:
from tqdm import tqdm
import torch.optim as optim


VOCAB_SIZE = 80
EMBEDDING_DIM = 200
N_UNITS = 256

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTMModel(VOCAB_SIZE, EMBEDDING_DIM, N_UNITS).to(device)

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=True)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr= 0.00125)

num_epochs = 2

for epoch in range(num_epochs):

    epoch_loss = 0
    val_loss = 0

    model.train()

    for inputs, targets in tqdm(train_dataloader):

        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)

        outputs = outputs.view(-1, model.fc.out_features)
        targets = targets.view(-1)
        loss = loss_fn(outputs, targets)

        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {epoch_loss / len(train_dataloader)}")

    model.eval()
    with torch.no_grad():
      for inputs, targets in val_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        outputs = outputs.view(-1, model.fc.out_features)
        targets = targets.view(-1)
        loss = loss_fn(outputs, targets)

        val_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs},  Validation Loss: {epoch_loss / len(val_dataloader)}")
    if epoch + 1 == 30:
      save_checkpoint(model, optimizer, epoch + 1, f"/content/drive/MyDrive/checkpoint/CharacterLevelLanguageModeling_t_checkpoint_epoch_{epoch + 1}.pth")



100%|██████████| 9289/9289 [00:31<00:00, 295.98it/s]


Epoch 1/2, Training Loss: 1.826241798492766
Epoch 1/2,  Validation Loss: 16.43794580058072


100%|██████████| 9289/9289 [00:31<00:00, 297.76it/s]


Epoch 2/2, Training Loss: 1.7060079968056467
Epoch 2/2,  Validation Loss: 15.355725079774857


In [None]:
checkpoint = torch.load("/content/drive/MyDrive/checkpoint/CharacterLevelLanguageModeling_t_checkpoint_epoch_30.pth")

  checkpoint = torch.load("/content/drive/MyDrive/checkpoint/CharacterLevelLanguageModeling_t_checkpoint_epoch_30.pth")


In [None]:
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [None]:
import torch

def generate_text(model, start_sequence, char_to_idx, idx_to_char, length=100, temperature=1.0):
    model.eval()  # Set the model to evaluation mode
    generated_sequence = start_sequence
    input_seq = torch.tensor(encode(generated_sequence)).unsqueeze(0).to(device)

    with torch.no_grad():
        for _ in range(length):
            output = model(input_seq)
            output = output[:, -1, :]  # Get the output of the last time step
            output = output / temperature  # Adjust the output by the temperature
            probabilities = torch.nn.functional.softmax(output, dim=-1).squeeze()
            predicted_idx = torch.multinomial(probabilities, 1).item()
            predicted_char = idx_to_char[predicted_idx]

            generated_sequence += predicted_char
            input_seq = torch.cat([input_seq, torch.tensor([[predicted_idx]]).to(device)], dim=1)

    return generated_sequence

# Example usage:
start_sequence = "i like y"

generated_text = generate_text(model, start_sequence, mapchartoint, mapinttochar, length=1000, temperature=0.8)
print(generated_text)


i like your weakness, and free and our Dromio, whose bed her sight
	Is doth leave to me of man. Master Brook, sir,
	Winter, that it damn'd to your highness, and punish Aumer Slender; the last lady's sharp subject
	Deeds he was a gallant delights disposition, burntly thus that hath not the more in nature,
	So minded; I'll and we all after herself, that you have not mockering.

	[Exeunt TRINCULO]

	The gauntrous man more sorry, she is? Forest to my sleeve
That man's witch you find the love and men's new-deep and do in death be coming.

TIMON	What, John Cupid help, in a most things and the field
	And leave a month prince in the trim that mangled the former offence holds your sword can see the love,
	So sure I am no more than another's heart and straight he was the cause with the fair scarcely too round
	And never let him well and peace, that have I seen your hour, even to crowns,
	Of our esteems that I have peevisous pleasant kingdom, come to whom of Sicilia.

VIOLA	By my thoughts and tom