In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
corpus = [
    "i love to eat pizza",
    "i love to eat pasta",
    "i love to write code",
    "i learn rnn",
    "rnn is simple",
    "rnn is cool",
    "i like programming",
    "programming is fun"
]

In [3]:
# Collect all unique words
words = set()
for sentence in corpus:
    words.update(sentence.lower().split())

# Sort vocabulary
vocab = sorted(list(words))
vocab_size = len(vocab)

print("Vocabulary:", vocab)
print("Vocabulary size:", vocab_size)

Vocabulary: ['code', 'cool', 'eat', 'fun', 'i', 'is', 'learn', 'like', 'love', 'pasta', 'pizza', 'programming', 'rnn', 'simple', 'to', 'write']
Vocabulary size: 16


In [6]:
word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i: word for i, word in enumerate(vocab)}

print("Word to Index:", word_to_idx)

Word to Index: {'code': 0, 'cool': 1, 'eat': 2, 'fun': 3, 'i': 4, 'is': 5, 'learn': 6, 'like': 7, 'love': 8, 'pasta': 9, 'pizza': 10, 'programming': 11, 'rnn': 12, 'simple': 13, 'to': 14, 'write': 15}


In [7]:
# Example:
# "i love to eat pizza"
# (["i"], "love")
# (["i", "love"], "to")
# (["i", "love", "to"], "eat")
# (["i", "love", "to", "eat"], "pizza")

sequences = []

for sentence in corpus:
    tokens = sentence.lower().split()
    for i in range(1, len(tokens)):
        context = tokens[:i]
        target = tokens[i]
        sequences.append((context, target))

print("Total training sequences:", len(sequences))
print("Sample sequence:", sequences[0])

Total training sequences: 22
Sample sequence: (['i'], 'love')


In [8]:
class WordRNN(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size):
        super(WordRNN, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embed_size)
        
        # RNN layer
        self.rnn = nn.RNN(embed_size, hidden_size, batch_first=True)
        
        # Output layer
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        # x: (batch_size, seq_len)
        
        x = self.embedding(x)
        # x: (batch_size, seq_len, embed_size)
        
        out, _ = self.rnn(x)
        # out: (batch_size, seq_len, hidden_size)
        
        out = out[:, -1, :]
        # out: (batch_size, hidden_size)
        
        out = self.fc(out)
        # out: (batch_size, vocab_size)
        
        return out