<a href="https://colab.research.google.com/github/OneFineStarstuff/State-of-the-Art/blob/main/Transformers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the Transformer model
class TransformerModel(nn.Module):
    def __init__(self, input_dim, model_dim, output_dim, n_heads, n_layers, dropout):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(input_dim, model_dim)
        self.transformer = nn.Transformer(d_model=model_dim, nhead=n_heads, num_encoder_layers=n_layers,
                                          num_decoder_layers=n_layers, dropout=dropout)
        self.fc = nn.Linear(model_dim, output_dim)

    def forward(self, src, tgt, src_mask=None, tgt_mask=None, memory_mask=None):
        src_emb = self.embedding(src) * torch.sqrt(torch.tensor([src.size(-1)], dtype=torch.float32))
        tgt_emb = self.embedding(tgt) * torch.sqrt(torch.tensor([tgt.size(-1)], dtype=torch.float32))
        output = self.transformer(src_emb, tgt_emb, src_mask, tgt_mask, memory_mask)
        return self.fc(output)

# Initialize the model, optimizer, and loss function
input_dim = 1000  # Vocabulary size for source and target
model_dim = 512
output_dim = input_dim  # Output dimension is same as input for sequence-to-sequence
n_heads = 8
n_layers = 6
dropout = 0.1

model = TransformerModel(input_dim, model_dim, output_dim, n_heads, n_layers, dropout)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

# Training loop (pseudo data for demonstration)
num_epochs = 5
for epoch in range(num_epochs):
    src = torch.randint(0, input_dim, (10, 32))  # (sequence length, batch size)
    tgt = torch.randint(0, input_dim, (10, 32))
    tgt_input = tgt[:-1, :]
    tgt_output = tgt[1:, :]

    optimizer.zero_grad()
    output = model(src, tgt_input)
    loss = criterion(output.view(-1, output_dim), tgt_output.view(-1))
    loss.backward()
    optimizer.step()

    print(f'Epoch {epoch + 1}, Loss: {loss.item():.4f}')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the Transformer model
class TransformerModel(nn.Module):
    def __init__(self, input_dim, model_dim, output_dim, n_heads, n_layers, dropout):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(input_dim, model_dim)
        self.transformer = nn.Transformer(d_model=model_dim, nhead=n_heads, num_encoder_layers=n_layers,
                                          num_decoder_layers=n_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(model_dim, output_dim)

    def forward(self, src, tgt, src_mask=None, tgt_mask=None, memory_mask=None):
        src_emb = self.embedding(src) * torch.sqrt(torch.tensor(model_dim, dtype=torch.float32))
        tgt_emb = self.embedding(tgt) * torch.sqrt(torch.tensor(model_dim, dtype=torch.float32))
        output = self.transformer(src_emb, tgt_emb, src_mask, tgt_mask, memory_mask)
        return self.fc(output)

# Initialize the model, optimizer, and loss function
input_dim = 1000  # Vocabulary size for source and target
model_dim = 512
output_dim = input_dim  # Output dimension is same as input for sequence-to-sequence
n_heads = 8
n_layers = 6
dropout = 0.1

model = TransformerModel(input_dim, model_dim, output_dim, n_heads, n_layers, dropout)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

# Training loop (pseudo data for demonstration)
num_epochs = 5
for epoch in range(num_epochs):
    src = torch.randint(0, input_dim, (32, 10))  # (batch size, sequence length)
    tgt = torch.randint(0, input_dim, (32, 10))
    tgt_input = tgt[:, :-1]
    tgt_output = tgt[:, 1:]

    optimizer.zero_grad()
    output = model(src, tgt_input)
    output = output.permute(0, 2, 1)  # (batch size, output_dim, sequence length)
    loss = criterion(output, tgt_output)
    loss.backward()
    optimizer.step()

    print(f'Epoch {epoch + 1}, Loss: {loss.item():.4f}')