In [62]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import TensorDataset, DataLoader

In [63]:
text = ("Once upon a time, in a faraway kingdom, there was a kind and beautiful princess named Snow White. "
        "She had skin as white as snow, lips as red as roses, and hair as black as coal. "
        "But she lived with her stepmother, the Queen, who was beautiful on the outside but jealous and cruel on the inside.")


In [64]:
chars = sorted(list(set(text)))
char_to_index = {char: i for i, char in enumerate(chars)}
index_to_char = {i: char for i, char in enumerate(chars)}

In [65]:
seq_length = 3
sequences = []
labels = []

In [66]:
for i in range(len(text)-seq_length):
  seq = text[i:i + seq_length]
  label = text[i + seq_length]
  sequences.append([char_to_index[char] for char in seq])
  labels.append(char_to_index[label])

In [67]:
x = np.array(sequences)
y = np.array(labels)

In [68]:
x_tensor = torch.from_numpy(x)
y_tensor = torch.from_numpy(y)

In [69]:
x_one_hot = torch.nn.functional.one_hot(x_tensor, num_classes = len(chars)).float()

In [70]:
dataset = TensorDataset(x_one_hot, y_tensor)
batch = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [71]:
class CharLSTM(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
     super(CharLSTM, self).__init__()
     self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
     self.fc = nn.Linear(hidden_size, num_classes)
  def forward(self, x):
     out, _ = self.lstm(x)
     out = out[:, -1, :]
     out = self.fc(out)
     return out


In [72]:
input_size = len(chars)
hidden_size = 160
num_layers = 6
num_classes = len(chars)
num_epochs = 300
learning_rate = 0.001

In [73]:
model = CharLSTM(input_size, hidden_size, num_layers, num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [74]:
for epoch in range(num_epochs):
  total_loss = 0
  for batch_x, batch_y in dataloader:
    optimizer.zero_grad()
    outputs = model(batch_x)
    loss = criterion(outputs, batch_y)
    loss.backward()
    optimizer.step()
    total_loss += loss.item()
  avg_loss = total_loss/len(dataloader)
  print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss: .4f}")

Epoch 1/300, Loss:  3.3970
Epoch 2/300, Loss:  3.1285
Epoch 3/300, Loss:  2.9752
Epoch 4/300, Loss:  2.9870
Epoch 5/300, Loss:  2.8940
Epoch 6/300, Loss:  3.0288
Epoch 7/300, Loss:  2.8870
Epoch 8/300, Loss:  2.8605
Epoch 9/300, Loss:  2.8354
Epoch 10/300, Loss:  2.9352
Epoch 11/300, Loss:  2.8447
Epoch 12/300, Loss:  2.8204
Epoch 13/300, Loss:  2.9989
Epoch 14/300, Loss:  2.9361
Epoch 15/300, Loss:  2.8231
Epoch 16/300, Loss:  2.9554
Epoch 17/300, Loss:  2.9067
Epoch 18/300, Loss:  2.9143
Epoch 19/300, Loss:  2.9502
Epoch 20/300, Loss:  3.0056
Epoch 21/300, Loss:  2.9799
Epoch 22/300, Loss:  2.9381
Epoch 23/300, Loss:  2.9061
Epoch 24/300, Loss:  2.8641
Epoch 25/300, Loss:  2.8541
Epoch 26/300, Loss:  2.9122
Epoch 27/300, Loss:  2.8390
Epoch 28/300, Loss:  2.9352
Epoch 29/300, Loss:  2.8783
Epoch 30/300, Loss:  2.7518
Epoch 31/300, Loss:  2.9853
Epoch 32/300, Loss:  2.7974
Epoch 33/300, Loss:  2.7225
Epoch 34/300, Loss:  2.8557
Epoch 35/300, Loss:  2.7779
Epoch 36/300, Loss:  2.8516
E

In [75]:
def generate_text(model, start_seq, length, char_to_index, index_to_char):
  model.eval()
  seq = [char_to_index[c] for c in start_seq]
  generated = start_seq
  for _ in range(length):
    x = torch.tensor([seq[-seq_length:]])
    x_onehot = torch.nn.functional.one_hot(x, num_classes=len(chars)).float()
    with torch.no_grad():
      out = model(x_onehot)
      pred = out.argmax(dim=1).item()
    generated += index_to_char[pred]
    seq.append(pred)
  return generated

In [76]:
print("Generated Text: ")
print(generate_text(model, "Once", 100, char_to_index, index_to_char))

Generated Text: 
Once upon the had sioeesa, khndaw ahd with  eiu .iu .iu .iu .iu .iu .iu .iu .iu .iu .iu .iu .iu .iu .iu 
