In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchtext.datasets import IMDB
from torchtext.data import Field, LabelField, TabularDataset, BucketIterator
import math
import spacy
spacy.load('en_core_web_sm')

class TransformerEncoder(nn.Module):
    def __init__(self, input_dim, embedding_dim, n_heads, hidden_dim, n_layers, dropout):
        super().__init__()

        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.pos_encoder = PositionalEncoding(embedding_dim, dropout)

        encoder_layer = nn.TransformerEncoderLayer(embedding_dim, n_heads, hidden_dim, dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, n_layers)

        self.fc = nn.Linear(embedding_dim, 1)

    def forward(self, x):
        x = self.embedding(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=0)
        x = self.fc(x)

        return x.squeeze(1)

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

# Load and preprocess the data
TEXT = Field(tokenize='spacy', lower=True)
LABEL = LabelField(dtype=torch.float)

# Hyperparameters
batch_size = 64
input_dim = len(TEXT.vocab)
embedding_dim = 256
n_heads = 8
hidden_dim = 512
n_layers = 6
dropout = 0.1

train_data, test_data = IMDB.splits(TEXT, LABEL)
TEXT.build_vocab(train_data, max_size=25000, vectors="glove.6B.100d")
LABEL.build_vocab(train_data)

train_iterator, test_iterator = BucketIterator.splits(
    (train_data, test_data),
    batch_size=batch_size,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

# Initialize the model and optimizer
model = TransformerEncoder(input_dim, embedding_dim, n_heads, hidden_dim, n_layers, dropout).to(device)
optimizer = torch.optim.Adam(model.parameters())

# Train the model
model.train()

for epoch in range(10):
    for batch in train_iterator:
        optimizer.zero_grad()

        text, label = batch.text, batch.label
        output = model(text).squeeze(1)
        loss = F.binary_cross_entropy_with_logits(output, label)

        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch}: Loss {loss.item()}')

# Evaluate the model
model.eval()

correct = 0
total = 0

with torch.no_grad():
    for batch in test_iterator:
        text, label = batch.text, batch.label
        output = model(text).squeeze(1)
        predicted = torch.round(torch.sigmoid(output))
        total += label.size(0)
        correct += (predicted == label).sum().item()

print(f'Test Accuracy: {100 * correct / total:.2f}%')

OSError: [E941] Can't find model 'en'. It looks like you're trying to load a model from a shortcut, which is obsolete as of spaCy v3.0. To load the model, use its full name instead:

nlp = spacy.load("en_core_web_sm")

For more details on the available models, see the models directory: https://spacy.io/models. If you want to create a blank model, use spacy.blank: nlp = spacy.blank("en")