In [None]:
# Import libraries
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import AutoTokenizer
# Other libraries

In [3]:
# Load training data from source
with open('data/training_data.txt', 'r') as file:
    raw_text = file.read()
print(raw_text[:500])

In [2]:
# Tokenize the training data
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
tokens = tokenizer.tokenize(raw_text)
print(tokens[:20])

In [4]:
# Map tokens to token IDs
token_ids = tokenizer.convert_tokens_to_ids(tokens)
print(token_ids[:20])

In [5]:
# Generate token and positional embeddings
class Embeddings(nn.Module):
    def __init__(self, vocab_size, emb_size, max_len):
        super().__init__()
        self.token_emb = nn.Embedding(vocab_size, emb_size)
        self.pos_emb = nn.Embedding(max_len, emb_size)
    def forward(self, x):
        positions = torch.arange(0, x.size(1), device=x.device).unsqueeze(0)
        return self.token_emb(x) + self.pos_emb(positions)
# Example usage:
vocab_size = tokenizer.vocab_size
emb_size = 256
max_len = 512
embedding_layer = Embeddings(vocab_size, emb_size, max_len)
input_ids = torch.tensor(token_ids).unsqueeze(0)
inputs_embedded = embedding_layer(input_ids)
print(inputs_embedded.shape)

In [8]:
# Define self attention mechanism
class SelfAttention(nn.Module):
    def __init__(self, emb_size):
        super().__init__()
        self.query = nn.Linear(emb_size, emb_size)
        self.key = nn.Linear(emb_size, emb_size)
        self.value = nn.Linear(emb_size, emb_size)
    def forward(self, x):
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)
        attn_scores = torch.matmul(Q, K.transpose(-2, -1)) / np.sqrt(x.size(-1))
        attn_weights = torch.softmax(attn_scores, dim=-1)
        return torch.matmul(attn_weights, V)
# Example usage:
attn = SelfAttention(emb_size)
attn_output = attn(inputs_embedded)
print(attn_output.shape)

In [9]:
# Build transformer block (attention + feed-forward + normalization)
class TransformerBlock(nn.Module):
    def __init__(self, emb_size, ff_hidden):
        super().__init__()
        self.attn = SelfAttention(emb_size)
        self.norm1 = nn.LayerNorm(emb_size)
        self.ff = nn.Sequential(
            nn.Linear(emb_size, ff_hidden),
            nn.ReLU(),
            nn.Linear(ff_hidden, emb_size)
        )
        self.norm2 = nn.LayerNorm(emb_size)
    def forward(self, x):
        attn_out = self.attn(x)
        x = self.norm1(x + attn_out)
        ff_out = self.ff(x)
        x = self.norm2(x + ff_out)
        return x
# Example usage:
transformer_block = TransformerBlock(emb_size, ff_hidden=512)
transformed = transformer_block(inputs_embedded)
print(transformed.shape)

In [10]:
# Stack multiple transformer blocks to form the model
class SimpleLLM(nn.Module):
    def __init__(self, vocab_size, emb_size, max_len, num_layers, ff_hidden):
        super().__init__()
        self.embeddings = Embeddings(vocab_size, emb_size, max_len)
        self.transformer_blocks = nn.ModuleList([
            TransformerBlock(emb_size, ff_hidden) for _ in range(num_layers)
        ])
        self.fc_out = nn.Linear(emb_size, vocab_size)
    def forward(self, x):
        x = self.embeddings(x)
        for block in self.transformer_blocks:
            x = block(x)
        logits = self.fc_out(x)
        return logits
# Example usage:
model = SimpleLLM(vocab_size, emb_size, max_len, num_layers=2, ff_hidden=512)
logits = model(input_ids)
print(logits.shape)

In [11]:
# Define loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [12]:
# Training loop (simplified)
num_epochs = 5
for epoch in range(num_epochs):
    optimizer.zero_grad()
    logits = model(input_ids)
    # For demonstration, using shifted input as target
    target = input_ids[:, 1:]
    logits = logits[:, :-1, :]
    loss = loss_fn(logits.reshape(-1, vocab_size), target.reshape(-1))
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")