In [None]:
import torch
import torch.nn as nn   
import torch.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

import torchtext
import torchtext.transforms as T
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

from datasets import load_dataset

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)

# Check the settings
print(torch.__version__)
print(torchtext.__version__)
print(f"Using {device} device")

In [None]:
# Prepare the dataset

train_dataset = load_dataset(
    "ucirvine/reuters21578", "ModApte", split="train", trust_remote_code=True
)

test_dataset = load_dataset(
    "ucirvine/reuters21578", "ModApte", split="test", trust_remote_code=True
)

In [None]:
# Tokenize the dataset

tokenizer = get_tokenizer("basic_english")

def yield_tokens(data_iter):
    for data_sample in data_iter:
        yield tokenizer(data_sample["text"])

vocab = build_vocab_from_iterator(yield_tokens(train_dataset), specials=["<pad>", "<sos>", "<eos>", "<unk>"], min_freq=2, special_first=True,)
print(f"Vocab size: {len(vocab)}")

vocab.set_default_index(vocab["<unk>"])

In [None]:
vocab.get_itos()[:10]

In [None]:
text_transform = T.Sequential(
    T.VocabTransform(vocab=vocab),
    T.AddToken(1, begin=True),
    T.Truncate(256),
    T.AddToken(2, begin=False),
    T.ToTensor(padding_value=0),
)

text_tokenizer = lambda batch: [tokenizer(x) for x in batch]
data_loader_train = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4, drop_last=True)
data_loader_test = DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=text_transform, num_workers=4)

In [None]:
class LSTM(nn.Module):
    def __init__(self, num_emb, output_size, num_layers=1, hidden_size=128):
        super(LSTM, self).__init__()

        self.embedding = nn.Embedding(num_emb, hidden_size)
        self.lstm = nn.LSTM(
            input_size=hidden_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=0.5,
        )
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, input_seq, hidden_input, mem_input):
        input_embs = self.embedding(input_seq)
        output, (hidden_output, mem_output) = self.lstm(input_embs, (hidden_input, mem_input))

        return self.fc(output), hidden_output, mem_output

In [None]:
hidden_size = 64
num_layers = 3

model = LSTM(num_emb=len(vocab), output_size=4, num_layers=num_layers, hidden_size=hidden_size).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001)

loss_fn = nn.CrossEntropyLoss()