In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
corpus = [
    "i love to eat pizza",
    "i love to eat pasta",
    "i love to write code",
    "i learn rnn",
    "rnn is simple",
    "rnn is cool",
    "i like programming",
    "programming is fun"
]

In [None]:
print("Preprocessing data...")
# Tokenize (split) sentences into words and build a vocabulary
words = set()
for sentence in corpus:
    words.update(sentence.lower().split())

vocab = sorted(list(words))
vocab_size = len(vocab)

Preprocessing data...


In [None]:
# Create mappings from words to numerical indices and back
word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i: word for i, word in enumerate(vocab)}

In [None]:
sequences = []
for sentence in corpus:
    tokens = sentence.lower().split()
    for i in range(1, len(tokens)):
        context = tokens[:i]
        target = tokens[i]
        sequences.append((context, target))

In [None]:
print(f"Vocabulary size: {vocab_size}")
print(f"Number of training sequences: {len(sequences)}")

Vocabulary size: 16
Number of training sequences: 22


In [None]:
class WordRNN(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size):
        super(WordRNN, self).__init__()
        self.hidden_size = hidden_size

        # 1. Embedding Layer: Turns word indices into dense vectors
        self.embedding = nn.Embedding(vocab_size, embed_size)

        # 2. RNN Layer: Processes the sequence of embedded vectors
        #    batch_first=True means input shape is (batch, seq_len, features)
        self.rnn = nn.RNN(embed_size, hidden_size, batch_first=True)

        # 3. Fully Connected Layer: Maps RNN output to a prediction over the vocab
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        # x starts as: (batch_size, seq_len) [indices]

        # 1. Pass through embedding layer
        x = self.embedding(x)
        # x is now: (batch_size, seq_len, embed_size) [vectors]

        # 2. Pass through RNN
        # We don't provide an initial hidden state, so nn.RNN defaults to zero
        out, _ = self.rnn(x)
        # out is: (batch_size, seq_len, hidden_size)

        # 3. We only care about the output from the *last* time step
        out = out[:, -1, :]
        # out is now: (batch_size, hidden_size)

        # 4. Pass through the final linear layer
        out = self.fc(out)
        # out is now: (batch_size, vocab_size) [logits for next word]

        return out


In [None]:
def forward(self, x):
        # x starts as: (batch_size, seq_len) [indices]

        # 1. Pass through embedding layer
        x = self.embedding(x)
        # x is now: (batch_size, seq_len, embed_size) [vectors]

        # 2. Pass through RNN
        # We don't provide an initial hidden state, so nn.RNN defaults to zero
        out, _ = self.rnn(x)
        # out is: (batch_size, seq_len, hidden_size)

        # 3. We only care about the output from the *last* time step
        out = out[:, -1, :]
        # out is now: (batch_size, hidden_size)

        # 4. Pass through the final linear layer
        out = self.fc(out)
        # out is now: (batch_size, vocab_size) [logits for next word]
        return out

In [None]:
# ----------------------------------------

# Hyperparameters
EMBED_SIZE = 10
HIDDEN_SIZE = 32
LEARNING_RATE = 0.01
EPOCHS = 200

# Instantiate model, loss, and optimizer
model = WordRNN(vocab_size, EMBED_SIZE, HIDDEN_SIZE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

print("Starting training...")

for epoch in range(EPOCHS):
    total_loss = 0

    # We train one sequence at a time (batch size = 1) for simplicity
    for context, target in sequences:
        # --- Prepare Tensors ---
        # Convert context words to indices
        context_idxs = [word_to_idx[w] for w in context]
        context_tensor = torch.tensor(context_idxs, dtype=torch.long).unsqueeze(0) # (1, seq_len)

        # Convert target word to index
        target_idx = [word_to_idx[target]]
        target_tensor = torch.tensor(target_idx, dtype=torch.long) # (1)

        # --- Forward Pass ---
        optimizer.zero_grad()
        output = model(context_tensor) # (1, vocab_size)

        # --- Calculate Loss & Backpropagate ---
        loss = criterion(output, target_tensor)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    if (epoch + 1) % 20 == 0:
        print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {total_loss/len(sequences):.4f}")

print("Training finished.")

# ----------------------------------------
# 5. Inference (Autocomplete Function)
# ----------------------------------------

def autocomplete(model, seed_text, n_words=3):
    """
    Generates n_words following the seed_text.
    """
    model.eval()  # Set model to evaluation mode

    # Use torch.no_grad() to disable gradient calculations
    with torch.no_grad():
        current_text = seed_text.lower()
        generated_words = current_text.split()

        for _ in range(n_words):
            # Prepare the input tensor
            try:
                context_idxs = [word_to_idx[w] for w in generated_words]
            except KeyError as e:
                print(f"Error: Word '{e.args[0]}' not in vocabulary.")
                return ' '.join(generated_words)

            context_tensor = torch.tensor(context_idxs, dtype=torch.long).unsqueeze(0)

            # Get the model's prediction
            output = model(context_tensor) # (1, vocab_size)

            # Get the index of the most likely next word
            pred_idx = torch.argmax(output, dim=1).item()

            # Convert index back to a word
            next_word = idx_to_word[pred_idx]

            # Append the new word to our sequence
            generated_words.append(next_word)

    return ' '.join(generated_words)


print("\n--- Autocomplete Examples ---")

seed1 = "i love"
print(f"Seed: '{seed1}' -> Prediction: '{autocomplete(model, seed1, n_words=2)}'")

seed2 = "rnn is"
print(f"Seed: '{seed2}' -> Prediction: '{autocomplete(model, seed2, n_words=2)}'")

seed3 = "i learn"
print(f"Seed: '{seed3}' -> Prediction: '{autocomplete(model, seed3, n_words=1)}'")

seed4 = "programming is"
print(f"Seed: '{seed4}' -> Prediction: '{autocomplete(model, seed4, n_words=1)}'")

Starting training...
Epoch [20/200], Loss: 0.5291
Epoch [40/200], Loss: 0.5110
Epoch [60/200], Loss: 0.5023
Epoch [80/200], Loss: 0.4990
Epoch [100/200], Loss: 0.4968
Epoch [120/200], Loss: 0.4946
Epoch [140/200], Loss: 0.4890
Epoch [160/200], Loss: 0.4891
Epoch [180/200], Loss: 0.4858
Epoch [200/200], Loss: 0.4781
Training finished.

--- Autocomplete Examples ---
Seed: 'i love' -> Prediction: 'i love to eat'
Seed: 'rnn is' -> Prediction: 'rnn is simple is'
Seed: 'i learn' -> Prediction: 'i learn rnn'
Seed: 'programming is' -> Prediction: 'programming is fun'
