<a href="https://colab.research.google.com/github/FurqanBhat/ML-Colab-Notebooks/blob/main/LSTM_%26_Text_Gen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

In [7]:
class LSTM(nn.Module):
  def __init__(self, input_size, hidden_size):
    super(LSTM, self).__init__()
    self.hidden_size = hidden_size

    self.forget_gate = nn.Linear(input_size + hidden_size, hidden_size)
    self.input_gate = nn.Linear(input_size + hidden_size, hidden_size)
    self.cell_gate = nn.Linear(input_size + hidden_size, hidden_size)
    self.output_gate = nn.Linear(input_size + hidden_size, hidden_size)

  def forward(self, input, hidden, cell):
    combined = torch.cat((input, hidden), 1)

    forget_gate_output = torch.sigmoid(self.forget_gate(combined))
    input_gate_output = torch.sigmoid(self.input_gate(combined))
    cell_gate_output = torch.tanh(self.cell_gate(combined))
    output_gate_output = torch.sigmoid(self.output_gate(combined))

    cell = forget_gate_output * cell + input_gate_output * cell_gate_output
    hidden = output_gate_output * torch.tanh(cell)

    return hidden, cell

In [None]:
device=torch.device('cuda' if torch.cuda.is_available else 'cpu')

In [8]:
#data loading
with open("shakespear_1000.txt", "r") as f:
    text = f.read()

chars = sorted(list(set(text)))
vocab_size = len(chars)

char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = {i: ch for ch, i in char2idx.items()}
encoded_text = [char2idx[c] for c in text]



In [11]:
#seq preparation
seq_length = 100
inputs, targets = [], []
for i in range(0, len(encoded_text) - seq_length):
    inputs.append(encoded_text[i:i+seq_length])
    targets.append(encoded_text[i+1:i+seq_length+1])

input_tensor = torch.tensor(inputs, dtype=torch.long)
target_tensor = torch.tensor(targets, dtype=torch.long)

dataset = TensorDataset(input_tensor, target_tensor)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)


[[29, 43, 36, 55, 4, 1, 51, 50, 50, 53, 1, 38, 50, 49, 55, 40, 48, 51, 55, 4, 1, 50, 53, 1, 38, 47, 36, 44, 48, 3, 39, 1, 55, 43, 50, 56, 1, 54, 47, 40, 51, 55, 1, 54, 50, 1, 41, 36, 44, 55, 43, 41, 56, 47, 4, 0, 18, 1, 48, 36, 60, 1, 38, 50, 49, 55, 53, 44, 57, 40, 1, 50, 56, 53, 1, 41, 36, 55, 43, 40, 53, 8, 1, 36, 49, 39, 4, 1, 44, 49, 1, 55, 43, 40, 44, 53, 1, 39, 40, 41], [43, 36, 55, 4, 1, 51, 50, 50, 53, 1, 38, 50, 49, 55, 40, 48, 51, 55, 4, 1, 50, 53, 1, 38, 47, 36, 44, 48, 3, 39, 1, 55, 43, 50, 56, 1, 54, 47, 40, 51, 55, 1, 54, 50, 1, 41, 36, 44, 55, 43, 41, 56, 47, 4, 0, 18, 1, 48, 36, 60, 1, 38, 50, 49, 55, 53, 44, 57, 40, 1, 50, 56, 53, 1, 41, 36, 55, 43, 40, 53, 8, 1, 36, 49, 39, 4, 1, 44, 49, 1, 55, 43, 40, 44, 53, 1, 39, 40, 41, 40], [36, 55, 4, 1, 51, 50, 50, 53, 1, 38, 50, 49, 55, 40, 48, 51, 55, 4, 1, 50, 53, 1, 38, 47, 36, 44, 48, 3, 39, 1, 55, 43, 50, 56, 1, 54, 47, 40, 51, 55, 1, 54, 50, 1, 41, 36, 44, 55, 43, 41, 56, 47, 4, 0, 18, 1, 48, 36, 60, 1, 38, 50, 49, 55,

In [None]:
class ShakespeareGenerator(nn.Module):
    def __init__(self, vocab_size, input_size, hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, input_size)
        self.lstm = LSTM(input_size, hidden_size)  # Your custom LSTM cell
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, h, c):
        batch_size, seq_len = x.shape
        outputs = []

        for t in range(seq_len):
            embedded = self.embedding(x[:, t])
            h, c = self.lstm(embedded, h, c)
            logits = self.fc(h)
            outputs.append(logits)

        return torch.stack(outputs, dim=1), h, c

In [None]:
# === Hyperparameters & Setup ===
input_size = 128
hidden_size = 256
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ShakespeareGenerator(vocab_size, input_size, hidden_size).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)
loss_fn = nn.CrossEntropyLoss()


In [None]:

# === Training Loop ===
for epoch in range(5):
    model.train()
    total_loss = 0
    for x_batch, y_batch in dataloader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        h = torch.zeros(x_batch.size(0), hidden_size).to(device)
        c = torch.zeros(x_batch.size(0), hidden_size).to(device)

        optimizer.zero_grad()
        output, _, _ = model(x_batch, h, c)
        loss = loss_fn(output.view(-1, vocab_size), y_batch.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(dataloader):.4f}")


Epoch 1, Loss: 0.0802
Epoch 2, Loss: 0.0795
Epoch 3, Loss: 0.0788
Epoch 4, Loss: 0.0775
Epoch 5, Loss: 0.0771


In [None]:
# torch.save(model.state_dict(), './model.pth') # Save the model's state_dict to a file named 'model.pth' in the current directory.

In [None]:
def generate(model, start_text, char2idx, idx2char, length=100, device='cuda'):
    """
    Generates text using the trained ShakespeareGenerator model.

    Args:
        model: The trained ShakespeareGenerator model.
        start_text: The initial text to start generation from.
        char2idx: A dictionary mapping characters to their indices.
        idx2char: A dictionary mapping indices to their characters.
        length: The desired length of the generated text.
        device: The device to run the model on (default: 'cuda').

    Returns:
        The generated text.
    """
    model.eval()  # Set the model to evaluation mode
    # Create input tensor and move it to the specified device
    input = torch.tensor([[char2idx[c] for c in start_text]], device=device)
    # Create hidden and cell state tensors and move them to the specified device
    h = torch.zeros(1, model.lstm.hidden_size, device=device)
    c = torch.zeros(1, model.lstm.hidden_size, device=device)

    result = start_text

    for _ in range(length):
        out, h, c = model(input, h, c)
        last_logits = out[0, -1]
        probs = torch.softmax(last_logits, dim=0)
        next_char_idx = torch.multinomial(probs, 1).item()
        result += idx2char[next_char_idx]
        # Create input tensor for the next character and move it to the specified device
        input = torch.tensor([[next_char_idx]], device=device)

    return result

In [13]:
# print(generate(model, "End", char2idx, idx2char, length=500))