In [None]:
import torch
import torch.nn as nn
import numpy as np
from collections import Counter
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

In [6]:
text = "Here is some example text data to train a word-level language model. It is a small dataset."

words = text.lower().split()
vocab = Counter(words)
vocab_size = len(vocab)

word_to_idx = {word: i for i, (word, _) in enumerate(vocab.items())}
idx_to_word = {i: word for word, i in word_to_idx.items()}

encoded_text = [word_to_idx[word] for word in words]

sequence_length = 5
sequences = []
targets = []

for i in range(len(encoded_text) - sequence_length):
    sequences.append(encoded_text[i:i+sequence_length])
    targets.append(encoded_text[i+sequence_length])

X_train, X_val, y_train, y_val = train_test_split(sequences, targets, test_size=0.2, random_state=42)

X_train, y_train = torch.tensor(X_train), torch.tensor(y_train)
X_val, y_val = torch.tensor(X_val), torch.tensor(y_val)

In [15]:
class WordLevelRNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        self.embedding = nn.Embedding(input_size, hidden_dim)
        self.rnn = nn.RNN(hidden_dim, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_size)

    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
        return hidden

    def forward(self, x):
        batch_size = x.shape[0]

        hidden = self.init_hidden(batch_size)
        x = self.embedding(x)

        out, hidden = self.rnn(x, hidden)

        out = self.fc(out[:, -1, :])

        return out, hidden


In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_size = vocab_size
output_size = vocab_size
hidden_dim = 128
n_layers = 2
batch_size = 4
learning_rate = 0.001
num_epochs = 100

model = WordLevelRNN(input_size, output_size, hidden_dim, n_layers).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    total_loss = 0
    for i in range(0, X_train.size(0), batch_size):
        inputs = X_train[i:i+batch_size].to(device)
        targets = y_train[i:i+batch_size].to(device)

        model.zero_grad()

        output, hidden = model(inputs)

        loss = criterion(output, targets)
        loss.backward()

        nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch: {epoch+1}/{num_epochs}, Loss: {total_loss/len(X_train):.4f}")

Epoch: 1/100, Loss: 0.8964
Epoch: 2/100, Loss: 0.5956
Epoch: 3/100, Loss: 0.4033
Epoch: 4/100, Loss: 0.2804
Epoch: 5/100, Loss: 0.2008
Epoch: 6/100, Loss: 0.1457
Epoch: 7/100, Loss: 0.1067
Epoch: 8/100, Loss: 0.0791
Epoch: 9/100, Loss: 0.0599
Epoch: 10/100, Loss: 0.0465
Epoch: 11/100, Loss: 0.0370
Epoch: 12/100, Loss: 0.0303
Epoch: 13/100, Loss: 0.0253
Epoch: 14/100, Loss: 0.0217
Epoch: 15/100, Loss: 0.0188
Epoch: 16/100, Loss: 0.0166
Epoch: 17/100, Loss: 0.0149
Epoch: 18/100, Loss: 0.0135
Epoch: 19/100, Loss: 0.0123
Epoch: 20/100, Loss: 0.0113
Epoch: 21/100, Loss: 0.0105
Epoch: 22/100, Loss: 0.0098
Epoch: 23/100, Loss: 0.0091
Epoch: 24/100, Loss: 0.0086
Epoch: 25/100, Loss: 0.0081
Epoch: 26/100, Loss: 0.0077
Epoch: 27/100, Loss: 0.0073
Epoch: 28/100, Loss: 0.0069
Epoch: 29/100, Loss: 0.0066
Epoch: 30/100, Loss: 0.0063
Epoch: 31/100, Loss: 0.0060
Epoch: 32/100, Loss: 0.0058
Epoch: 33/100, Loss: 0.0056
Epoch: 34/100, Loss: 0.0053
Epoch: 35/100, Loss: 0.0051
Epoch: 36/100, Loss: 0.0049
E

In [18]:
def predict(model, word, hidden=None):
    model.eval()

    x = torch.tensor([[word_to_idx[word]]]).to(device)

    out, hidden = model(x)

    prob = torch.softmax(out, dim=1).data
    word_idx = torch.argmax(prob).item()

    return idx_to_word[word_idx], hidden

def generate_sequence(model, start_word, sequence_len=10):
    model.eval()
    words = [start_word]

    hidden = model.init_hidden(1).to(device)

    word = start_word
    for _ in range(sequence_len - 1):
        word, hidden = predict(model, word, hidden)
        words.append(word)

    return ' '.join(words)

start_word = 'here'
print("Generated sequence:", generate_sequence(model, start_word))

Generated sequence: here word-level language model. it is word-level language model. it
