In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator


In [None]:
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, max_seq_len, num_classes):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = nn.Parameter(torch.zeros(1, max_seq_len, d_model))
        self.transformer = nn.Transformer(d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward)
        self.fc_out = nn.Linear(d_model, num_classes)
        
    def forward(self, src, tgt):
        src_emb = self.embedding(src) + self.positional_encoding[:, :src.size(0), :]
        tgt_emb = self.embedding(tgt) + self.positional_encoding[:, :tgt.size(0), :]
        output = self.transformer(src_emb, tgt_emb)
        output = self.fc_out(output[-1, :, :])
        return output


In [None]:
# Hyperparameters
vocab_size = 10000
d_model = 512
nhead = 8
num_encoder_layers = 6
num_decoder_layers = 6
dim_feedforward = 2048
max_seq_len = 100
num_classes = 10

model = TransformerModel(vocab_size, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, max_seq_len, num_classes)


In [None]:
tokenizer = get_tokenizer('basic_english')

def yield_tokens(data_iter):
    for text in data_iter:
        yield tokenizer(text)

# Example data
train_data = ["Hello world", "My name is John", "Transformers are cool"]
vocab = build_vocab_from_iterator(yield_tokens(train_data), specials=["<unk>", "<pad>"])
vocab.set_default_index(vocab["<unk>"])

def text_pipeline(x):
    return vocab(tokenizer(x))

def collate_batch(batch):
    text_list = [torch.tensor(text_pipeline(item), dtype=torch.long) for item in batch]
    text_list = nn.utils.rnn.pad_sequence(text_list, batch_first=True)
    return text_list


In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_data, batch_size=2, collate_fn=collate_batch)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        src, tgt = batch, batch  # Simplified for this example
        optimizer.zero_grad()
        output = model(src, tgt)
        loss = criterion(output, torch.tensor([1, 2]))  # Dummy target tensor
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader)}')
