<a href="https://colab.research.google.com/github/Nedu21/Pytorch-deep-learning-projects-/blob/main/LSTM_CharacterLevel_Text_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# "The Ghost in the Shell" (Character-Level Text Generator)

In [1]:
import torch
import torch.nn as nn
import math
import random

## Data Creation & Pre-processing

In [2]:
# 1. The Raw Data
# We'll use a tiny snippet of Shakespeare for this experiment.
text = """
To be, or not to be, that is the question:
Whether 'tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles
And by opposing end them. To die—to sleep,
No more; and by a sleep to say we end
The heart-ache and the thousand natural shocks
That flesh is heir to: 'tis a consummation
Devoutly to be wish'd. To die, to sleep;
To sleep, perchance to dream—ay, there's the rub:
"""

In [12]:
# 2. Build the vocabulary
# 'set(text)' finds unique chars, 'sorted' puts them in order
chars = sorted(list(set(text)))
vocab_size = len(chars)

print(f'Total Characters in text: {len(text)}')
print(f'Unique Characters (Vocab Size): {vocab_size}')
print(f'Vocabulary: {''.join(chars)}')

Total Characters in text: 435
Unique Characters (Vocab Size): 38
Vocabulary: 
 ',-.:;ADNOTWabcdefghiklmnopqrstuvwy—


In [13]:
# 3. Create Mappings
# char_to_ix: Converts 'a' -> 14
# ix_to_char: Converst 14 -> 'a'
char_to_ix = { ch:i for i, ch in enumerate(chars) }
ix_to_char = { i:ch for i, ch in enumerate(chars) }

In [14]:
# 4. Helper Function: String to Tensor
def str_to_tensor(s):
  """
    Converts a string like "Hello" into a LongTensor like [12, 5, 20, 20, 31]
    unsqueeze(0) adds the batch dimension: Shape (1, Sequence_Length)
  """
  idxs = [char_to_ix[c] for c in s]
  return torch.tensor(idxs, dtype=torch.long).unsqueeze(0)

In [15]:
# 5. Prepare Training data
# Input: Everything EXCEPT the last char
# Target: Everything EXCEPT the first char
input_seq = str_to_tensor(text[:-1])
target_seq = str_to_tensor(text[1:])

print(f"\nInput Shape: {input_seq.shape}")
print(f"Target Shape: {target_seq.shape}")


Input Shape: torch.Size([1, 434])
Target Shape: torch.Size([1, 434])


In [16]:
# Sanity Check
print(f"First 5 Input Chars:  {[ix_to_char[ix.item()] for ix in input_seq[0, :5]]}")
print(f"First 5 Target Chars: {[ix_to_char[ix.item()] for ix in target_seq[0, :5]]}")

First 5 Input Chars:  ['\n', 'T', 'o', ' ', 'b']
First 5 Target Chars: ['T', 'o', ' ', 'b', 'e']


## Model Creation

In [27]:
class CustomLSTMCell(nn.Module):
  def __init__(self, input_sz, hidden_sz):
    super().__init__()
    self.input_sz = input_sz
    self.hidden_sz = hidden_sz
    self.W = nn.Parameter(torch.Tensor(input_sz + hidden_sz, hidden_sz * 4))
    self.b = nn.Parameter(torch.Tensor(hidden_sz * 4))
    self.init_weights()

  def init_weights(self):
    stdv = 1.0 / math.sqrt(self.hidden_sz)
    for weight in self.parameters():
      weight.data.uniform_(-stdv, stdv)

  def forward(self, x, init_states=None):
    h_prev, C_prev = init_states
    combined = torch.cat((x, h_prev), 1)
    gates = combined @ self.W + self.b
    slices = gates.chunk(4, dim=1)

    f_t = torch.sigmoid(slices[0])
    i_t = torch.sigmoid(slices[1])
    C_tilde = torch.tanh(slices[2])
    o_t = torch.sigmoid(slices[3])

    C_t = (f_t * C_prev) + (i_t * C_tilde)
    h_t = o_t * torch.tanh(C_t)
    return h_t, C_t

In [28]:
class CustomLSTM(nn.Module):
  def __init__(self, input_sz, hidden_sz):
    super().__init__()
    self.input_sz = input_sz
    self.hidden_sz = hidden_sz
    self.cell = CustomLSTMCell(input_sz, hidden_sz)

  def forward(self, x):
    # x shape: (batch_size, seq_len, input_sz)
    batch_size, seq_len, _ = x.size()

    # Initialize Memory (Zeros)
    h_t = torch.zeros(batch_size, self.hidden_sz)
    C_t = torch.zeros(batch_size, self.hidden_sz)

    hidden_state = []

    for t in range(seq_len):
      x_t = x[:, t, :]
      h_t, C_t = self.cell(x_t, (h_t, C_t))
      hidden_state.append(h_t)

    return torch.stack(hidden_state, dim=1)

In [29]:
# THE MODEL WRAPPER
class TextGenerator(nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_dim):
    super().__init__()
    # 1. Embedding: Map index -> vector
    self.embedding = nn.Embedding(vocab_size, embedding_dim)

    # 2. Your Engine: vector -> hidden memory
    self.lstm = CustomLSTM(embedding_dim, hidden_dim)

    # 3. Decoder: hidden memory -> vocab_size
    self.fc = nn.Linear(hidden_dim, vocab_size)

  def forward(self, x):
    # x shape: (batch, seq_len)

    # Embed: (batch, seq_len, embed_dim)
    embeds = self.embedding(x)

    # Run LSTM: (batch, seq_len, hidden_dim)
    lstm_out = self.lstm(embeds)

    # Decode: (batch, seq_len, vocab_size)
    logits = self.fc(lstm_out)
    return logits

In [30]:
# Hyperparameters
EMBED_DIM = 32
HIDDEN_DIM = 64

# Instantiate
model = TextGenerator(vocab_size, EMBED_DIM, HIDDEN_DIM)

# Dummy Pass (Use the input_seq from Step 1)
dummy_output = model(input_seq)

print(f"Input Shape:  {input_seq.shape}")
print(f"Output Shape: {dummy_output.shape}")
print(f"Expected:     (1, {input_seq.shape[1]}, {vocab_size})")

if dummy_output.shape == (1, input_seq.shape[1], vocab_size):
    print("SUCCESS: Dimensions match!")
else:
    print("FAILURE: Dimension mismatch.")

Input Shape:  torch.Size([1, 434])
Output Shape: torch.Size([1, 434, 38])
Expected:     (1, 434, 38)
SUCCESS: Dimensions match!


## Training

In [32]:
# Hyperparameters
LR = 0.01
EPOCHS = 500

# Setup tools
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

print(f'Starting training on {len(text)} characters...')

# The loop
loss_history = []

for epoch in range(EPOCHS):
  logits = model(input_seq)
  # Calculate Loss
  # We must FLATTEN the output and target for CrossEntropy
  # View(-1, VOCAB_SIZE) -> Stacks all time steps on top of each other
  # View(-1) -> Flattens the target into one long list of indices
  loss = criterion(logits.view(-1, vocab_size), target_seq.view(-1))
  loss.backward()
  optimizer.step()
  optimizer.zero_grad()

  loss_history.append(loss.item())

  if epoch % 50 == 0:
    print(f'Epoch {epoch} | Loss: {loss.item():.4f}')

print(f'Final Loss: {loss.item():.4f}')

Starting training on 435 characters...
Epoch 0 | Loss: 3.6247
Epoch 50 | Loss: 0.5412
Epoch 100 | Loss: 0.0319
Epoch 150 | Loss: 0.0120
Epoch 200 | Loss: 0.0069
Epoch 250 | Loss: 0.0046
Epoch 300 | Loss: 0.0746
Epoch 350 | Loss: 0.0128
Epoch 400 | Loss: 0.0073
Epoch 450 | Loss: 0.0050
Final Loss: 0.0038


## Model evaluation

In [33]:
import torch.nn.functional as F

def generate_text(model, start_str="To be", predict_len=200, temperature=0.5):
    """
    temperature:
       < 1.0 (Conservative, rigid, sticks to training data)
       > 1.0 (Creative, chaotic, makes mistakes)
    """
    model.eval()

    # 1. Prepare Initial Input
    current_input = str_to_tensor(start_str)
    generated_text = start_str

    print(f"--- GENERATING (Temp: {temperature}) ---")

    for _ in range(predict_len):
        with torch.no_grad():
            # A. Get logits
            output = model(current_input)

            # B. Look at ONLY the last character's prediction
            last_char_logits = output[0, -1, :]

            # C. Apply Temperature & Softmax
            # Dividing by temp flattens or sharpens the curve
            probs = F.softmax(last_char_logits / temperature, dim=0)

            # D. Sample from the distribution
            predicted_idx = torch.multinomial(probs, 1).item()

            # E. Decode and Append
            generated_char = ix_to_char[predicted_idx]
            generated_text += generated_char

            # F. Update Input for next step
            # We append the new index to the sequence
            next_idx_tensor = torch.tensor([[predicted_idx]], dtype=torch.long)
            current_input = torch.cat([current_input, next_idx_tensor], dim=1)

    return generated_text

# --- RUN IT ---
# Since your loss is near 0, a low temp (0.5) should recite Shakespeare perfectly.
print(generate_text(model, start_str="To be", predict_len=300, temperature=0.5))

--- GENERATING (Temp: 0.5) ---
To be, or not to be, that is the question:
Whether 'tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles
And by opposing end them. To die—to sleep,
No more; and by a sleep to say we end
The heart-ache and the thousand natural shocks
That 
