# Basic LLM Workflow


## Step 1: Text Tokenization
We define a small vocabulary and tokenize a sentence into numbers (token IDs).

In [221]:
import torch
import random

# Define a toy vocabulary
vocab = {"every": 0, "effort": 1, "moves": 2, "you": 3, "END": 4, "like": 5, "chocolate": 6, "I": 7, "really": 8}
inv_vocab = {v: k for k, v in vocab.items()}

# Training data: inputs and targets (shifted by one token)
train_sentences = [[7, 8, 5], [0, 1, 2]]  # ["I really like"], ["every effort moves"]
train_targets = [[8, 5, 6], [1, 2, 3]]   # ["really like chocolate"], ["effort moves you"]

# Convert to tensors
x_train = torch.tensor(train_sentences)
y_train = torch.tensor(train_targets)

print("Input:", x_train)
print("Target:", y_train)

Input: tensor([[7, 8, 5],
        [0, 1, 2]])
Target: tensor([[8, 5, 6],
        [1, 2, 3]])


## Step 2: Loss Function (Cross Entropy)
We use a standard loss to measure how different the model's output is from the target.

In [223]:
# Create a tiny model with embedding + linear layer
vocab_size = len(vocab)
embedding_dim = 10

embedding = torch.nn.Embedding(vocab_size, embedding_dim)
linear = torch.nn.Linear(embedding_dim, vocab_size)

# Forward pass (generate predictions)
embedded = embedding(x_train)              # Shape: [2, 3, 10]
logits = linear(embedded)                  # Shape: [2, 3, vocab_size]

# Flatten for cross entropy loss
logits_flat = logits.view(-1, vocab_size)  # [6, vocab_size]
targets_flat = y_train.view(-1)            # [6]

# Calculate loss
loss = torch.nn.functional.cross_entropy(logits_flat, targets_flat)
print("Initial loss:", loss.item())

Initial loss: 2.4884560108184814


## Step 3: Training Loop
Train the model for a few epochs and update weights to reduce loss.

In [225]:
# Optimizer to update model weights
optimizer = torch.optim.AdamW(list(embedding.parameters()) + list(linear.parameters()), lr=0.05)

# Run a simple training loop
for epoch in range(10):
    embedded = embedding(x_train)
    logits = linear(embedded)
    logits_flat = logits.view(-1, vocab_size)
    targets_flat = y_train.view(-1)
    
    loss = torch.nn.functional.cross_entropy(logits_flat, targets_flat)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 2.4885
Epoch 2, Loss: 1.9622
Epoch 3, Loss: 1.5239
Epoch 4, Loss: 1.1625
Epoch 5, Loss: 0.8660
Epoch 6, Loss: 0.6258
Epoch 7, Loss: 0.4361
Epoch 8, Loss: 0.2924
Epoch 9, Loss: 0.1897
Epoch 10, Loss: 0.1205


## Step 4: Validation Set
We test the model on data it hasn't seen before.

In [227]:
# Validation data
val_input = torch.tensor([[7, 8, 5]])  # "I really like"
val_target = torch.tensor([[8, 5, 6]])  # "really like chocolate"

# Run model
with torch.no_grad():
    val_emb = embedding(val_input)
    val_logits = linear(val_emb)
    val_loss = torch.nn.functional.cross_entropy(val_logits.view(-1, vocab_size), val_target.view(-1))

print("Validation Loss:", val_loss.item())

Validation Loss: 0.09278151392936707


## Step 5: Model Evaluation (Prediction)
We predict the next word for a given prompt and decode it.

In [229]:
# Predict next token after the prompt "every effort moves"
prompt = torch.tensor([[0, 1, 2]])  # "every effort moves"
with torch.no_grad():
    emb = embedding(prompt)
    logits = linear(emb)
    next_token_logits = logits[0, -1]  # last token's prediction
    predicted_id = torch.argmax(next_token_logits).item()

print("Predicted next word:", inv_vocab[predicted_id])

Predicted next word: you


## Step 6: Sample Generation
Generate a short sentence word-by-word from a prompt.

In [231]:
# Start with "I"
sentence = [7]  # 'I'

# Generate 5 more words
for _ in range(5):
    input_tensor = torch.tensor([sentence])
    with torch.no_grad():
        emb = embedding(input_tensor)
        logits = linear(emb)
        next_token_logits = logits[0, -1]
        next_token = torch.argmax(next_token_logits).item()
        sentence.append(next_token)
        if next_token == vocab['END']:
            break

# Decode sentence
decoded = [inv_vocab[tok] for tok in sentence]
print("Generated sentence:", ' '.join(decoded))

Generated sentence: I really like chocolate like chocolate
