In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import lightning as L

from torch.utils.data import DataLoader, TensorDataset
device = "cuda" if torch.cuda.is_available() else "cpu"

In [20]:
with open('input.txt', 'r') as f:
    text = f.read()

In [21]:
string_to_integers = {ch:i for i, ch in enumerate(sorted(set(text)))}
integers_to_string = {i:ch for i, ch in enumerate(sorted(set(text)))}

encode = lambda x: [string_to_integers[ch] for ch in x]
decode = lambda x: ''.join([integers_to_string[i] for i in x])

In [22]:
data = torch.tensor(encode(text))
data.to(device)

split = int(0.9 * len(data))
train_data, val_data = data[:split], data[split:]

In [23]:
batch_size = 8
block_size = 16
def get_batch(split):
    data = train_data if split == 'train' else val_data
    idx = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in idx])
    y = torch.stack([data[i+1:i+1+block_size] for i in idx])
    return x, y

xt, yt = get_batch('train')
xt, yt = xt.to(device), yt.to(device)

In [14]:
class BigramLightningModule(L.LightningModule):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)
    
    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.rnn(x)
        x = self.fc(x)
        return x
    
    def loss(self, x, y):
        x = self(x)
        return F.cross_entropy(x.view(-1, x.size(-1)), y.view(-1))
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        loss = self.loss(x, y)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        loss = self.loss(x, y)
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)  


In [None]:
model = BigramLightningModule(vocab_size=len(string_to_integers), embedding_dim=16, hidden_dim=256).to(device)
trainer = L.Trainer(max_epochs=10)

trainer.fit(model, DataLoader(TensorDataset(train_data, train_data), batch_size=batch_size, shuffle=True), DataLoader(TensorDataset(val_data, val_data), batch_size=batch_size))


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name      | Type      | Params | Mode 
------------------------------------------------
0 | embedding | Embedding | 1.0 K  | train
1 | rnn       | RNN       | 70.1 K | train
2 | fc        | Linear    | 16.7 K | train
------------------------------------------------
87.9 K    Trainable params
0         Non-trainable params
87.9 K    Total params
0.352     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


                                                                            

c:\Users\11ukn\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
c:\Users\11ukn\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Epoch 0:   4%|▍         | 4750/125482 [00:38<16:07, 124.80it/s, v_num=1]


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined