In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import time
import numpy as np

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Your text data
text = """Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text.

At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model.

One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks.

Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time.

Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants.

In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."""

# Create vocabulary
chars = sorted(list(set(text)))
vocab_size = len(chars)
char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = {i: ch for ch, i in char2idx.items()}
encoded_text = [char2idx[c] for c in text]

# Dataset
class CharDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        return (
            torch.tensor(self.data[idx:idx+self.seq_length]),
            torch.tensor(self.data[idx+self.seq_length])
        )


In [2]:
##Transformer Model for Character Prediction
class TransformerCharModel(nn.Module):
    def __init__(self, vocab_size, emb_size=64, num_heads=2, num_layers=2, ff_dim=128, dropout=0.1):
        super(TransformerCharModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.pos_embedding = nn.Parameter(torch.randn(1, 1000, emb_size))

        encoder_layer = nn.TransformerEncoderLayer(d_model=emb_size, nhead=num_heads, dim_feedforward=ff_dim, dropout=dropout)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(emb_size, vocab_size)

    def forward(self, x):
        seq_len = x.size(1)
        x = self.embedding(x) + self.pos_embedding[:, :seq_len, :]
        x = self.transformer(x)
        out = self.fc(x[:, -1, :])  # Predict next char using last token
        return out


In [3]:
###Train Function
def train_model(model, train_loader, val_loader, num_epochs=5):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.003)

    start_time = time.time()
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            output = model(x)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")

    train_time = time.time() - start_time

    # Validation
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(device), y.to(device)
            output = model(x)
            preds = output.argmax(dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)

    acc = correct / total
    return total_loss / len(train_loader), acc, train_time, sum(p.numel() for p in model.parameters())


In [4]:
sequence_lengths = [10, 20, 30]

for seq_len in sequence_lengths:
    print(f"\nTraining Transformer for sequence length {seq_len}")
    dataset = CharDataset(encoded_text, seq_len)
    train_size = int(0.9 * len(dataset))
    val_size = len(dataset) - train_size
    train_set, val_set = torch.utils.data.random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=64)

    model = TransformerCharModel(vocab_size)
    loss, acc, t_time, n_params = train_model(model, train_loader, val_loader)

    print(f"Loss: {loss:.4f}, Accuracy: {acc:.4f}, Time: {t_time:.2f}s, Params: {n_params}")



Training Transformer for sequence length 10




Epoch 1, Loss: 3.0356
Epoch 2, Loss: 2.6818
Epoch 3, Loss: 2.5292
Epoch 4, Loss: 2.4451
Epoch 5, Loss: 2.4143
Loss: 2.4143, Accuracy: 0.3054, Time: 3.84s, Params: 136749

Training Transformer for sequence length 20




Epoch 1, Loss: 3.0355
Epoch 2, Loss: 2.6159
Epoch 3, Loss: 2.5041
Epoch 4, Loss: 2.4308
Epoch 5, Loss: 2.4023
Loss: 2.4023, Accuracy: 0.2983, Time: 6.82s, Params: 136749

Training Transformer for sequence length 30
Epoch 1, Loss: 3.1053
Epoch 2, Loss: 2.6914
Epoch 3, Loss: 2.5310
Epoch 4, Loss: 2.4587
Epoch 5, Loss: 2.4038
Loss: 2.4038, Accuracy: 0.2405, Time: 8.46s, Params: 136749


In [5]:
##RNN (LSTM) Model
class LSTMCharModel(nn.Module):
    def __init__(self, vocab_size, emb_size=64, hidden_size=128, num_layers=2):
        super(LSTMCharModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.lstm = nn.LSTM(emb_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        x = self.embedding(x)
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # last timestep
        return out


In [6]:
##RNN + Attention Model

class AttentionLSTMCharModel(nn.Module):
    def __init__(self, vocab_size, emb_size=64, hidden_size=128, num_layers=1):
        super(AttentionLSTMCharModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.lstm = nn.LSTM(emb_size, hidden_size, num_layers, batch_first=True)
        self.attn = nn.Linear(hidden_size, 1)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        x = self.embedding(x)
        lstm_out, _ = self.lstm(x)

        # Attention scores and context vector
        attn_weights = torch.softmax(self.attn(lstm_out).squeeze(-1), dim=1)  # (batch, seq_len)
        context = torch.sum(lstm_out * attn_weights.unsqueeze(-1), dim=1)     # (batch, hidden)

        out = self.fc(context)
        return out


In [7]:
# Same train_model() function works here

print("\n========== RNN (LSTM) Model ==========")
for seq_len in sequence_lengths:
    print(f"\nTraining LSTM for sequence length {seq_len}")
    dataset = CharDataset(encoded_text, seq_len)
    train_size = int(0.9 * len(dataset))
    val_size = len(dataset) - train_size
    train_set, val_set = torch.utils.data.random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=64)

    model = LSTMCharModel(vocab_size)
    loss, acc, t_time, n_params = train_model(model, train_loader, val_loader)

    print(f"Loss: {loss:.4f}, Accuracy: {acc:.4f}, Time: {t_time:.2f}s, Params: {n_params}")

print("\n========== RNN + Attention Model ==========")
for seq_len in sequence_lengths:
    print(f"\nTraining LSTM+Attention for sequence length {seq_len}")
    dataset = CharDataset(encoded_text, seq_len)
    train_size = int(0.9 * len(dataset))
    val_size = len(dataset) - train_size
    train_set, val_set = torch.utils.data.random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=64)

    model = AttentionLSTMCharModel(vocab_size)
    loss, acc, t_time, n_params = train_model(model, train_loader, val_loader)

    print(f"Loss: {loss:.4f}, Accuracy: {acc:.4f}, Time: {t_time:.2f}s, Params: {n_params}")




Training LSTM for sequence length 10
Epoch 1, Loss: 3.1495
Epoch 2, Loss: 2.8203
Epoch 3, Loss: 2.4555
Epoch 4, Loss: 2.1652
Epoch 5, Loss: 1.8831
Loss: 1.8831, Accuracy: 0.4351, Time: 6.17s, Params: 240109

Training LSTM for sequence length 20
Epoch 1, Loss: 3.1699
Epoch 2, Loss: 2.8400
Epoch 3, Loss: 2.4349
Epoch 4, Loss: 2.1573
Epoch 5, Loss: 1.9378
Loss: 1.9378, Accuracy: 0.4706, Time: 11.77s, Params: 240109

Training LSTM for sequence length 30
Epoch 1, Loss: 3.1648
Epoch 2, Loss: 2.8256
Epoch 3, Loss: 2.4644
Epoch 4, Loss: 2.2109
Epoch 5, Loss: 2.0036
Loss: 2.0036, Accuracy: 0.4346, Time: 17.42s, Params: 240109


Training LSTM+Attention for sequence length 10
Epoch 1, Loss: 3.2056
Epoch 2, Loss: 3.0109
Epoch 3, Loss: 2.9424
Epoch 4, Loss: 2.7672
Epoch 5, Loss: 2.4786
Loss: 2.4786, Accuracy: 0.2887, Time: 2.78s, Params: 108142

Training LSTM+Attention for sequence length 20
Epoch 1, Loss: 3.2191
Epoch 2, Loss: 3.0679
Epoch 3, Loss: 3.0424
Epoch 4, Loss: 3.0291
Epoch 5, Loss: 3.0