In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import random 
from torchtext import data
from torchtext import datasets
from torchtext.data import Field, BucketIterator

In [None]:
# Set a random seed for reproducibility
SEED = 1234
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# Define text and label fields
TEXT = Field(tokenize='spacy', tokenizer_language='en_core_web_sm')
LABEL = data.LabelField(dtype=torch.float)

# Load IMDb dataset
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)

# Split the training data into train and validation sets
train_data, valid_data = train_data.split(random_state=random.seed(SEED))

In [None]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, embed_dim, hidden_dim, output_dim, n_layers, n_heads, dropout):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embed_dim)
        self.transformer = nn.Transformer(
            d_model=embed_dim,
            nhead=n_heads,
            num_encoder_layers=n_layers,
            num_decoder_layers=n_layers,
            dim_feedforward=hidden_dim,
            dropout=dropout
        )
        self.fc = nn.Linear(embed_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        embedded = self.dropout(self.embedding(src))
        output = self.transformer(embedded, embedded)
        output = self.fc(output[-1, :, :])
        return output


In [None]:
# Build vocabulary
TEXT.build_vocab(train_data, max_size=25000, vectors="glove.6B.100d")
LABEL.build_vocab(train_data)

# Create iterators
BATCH_SIZE = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size=BATCH_SIZE,
    device=device
)


In [None]:
# Initialize model
INPUT_DIM = len(TEXT.vocab)
EMBED_DIM = 128  # Modify this to be divisible by N_HEADS
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 3
N_HEADS = 8
DROPOUT = 0.5

# Ensure EMBED_DIM is divisible by N_HEADS
assert EMBED_DIM % N_HEADS == 0, "EMBED_DIM must be divisible by N_HEADS"

model = TransformerModel(INPUT_DIM, EMBED_DIM, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, N_HEADS, DROPOUT).to(device)

# Define loss and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters())


In [None]:
def train(model, iterator, optimizer, criterion):
    model.train()
    epoch_loss = 0
    for batch in iterator:
        optimizer.zero_grad()
        predictions = model(batch.text).squeeze(1)
        loss = criterion(predictions, batch.label)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(iterator)

N_EPOCHS = 5
for epoch in range(N_EPOCHS):
    train_loss = train(model, train_iterator, optimizer, criterion)
    print(f'Epoch: {epoch+1}, Train Loss: {train_loss:.3f}')


In [None]:
def evaluate(model, iterator, criterion):
    model.eval()
    epoch_loss = 0
    with torch.no_grad():
        for batch in iterator:
            predictions = model(batch.text).squeeze(1)
            loss = criterion(predictions, batch.label)
            epoch_loss += loss.item()
    return epoch_loss / len(iterator)

# Evaluate on validation set
valid_loss = evaluate(model, valid_iterator, criterion)
print(f'Validation Loss: {valid_loss:.3f}')

# Evaluate on test set
test_loss = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f}')


In [None]:
# Save the trained model
torch.save(model.state_dict(), 'transformer_model.pth')
