### Aligning Transformer Architecture to GPT2 State Dict

In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from load_gpt2_weights import convert_gpt2_weights, load_gpt2_weights, run_inference
from gpt2 import TransformerSampler, ModelConfig, GenerationConfig

model_cfg = ModelConfig()
gen_cfg = GenerationConfig()
sampler = load_gpt2_weights(model_cfg, gen_cfg)
run_inference(sampler)

# Train GPT 2 from scratch

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datasets
from gpt2 import GPT2, ModelConfig, GenerationConfig
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

class TrainingConfig():
    batch_size = 1024
    epochs = 1
    lr: float = 1e-3
    weight_decay: float = 1e-2
    wandb_project: str | None = "training_gpt2"
    wandb_name: str | None = None

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token

model_cfg = ModelConfig()
model_cfg.vocab_size = tokenizer.vocab_size

gen_cfg = GenerationConfig()
model = GPT2(model_cfg)

In [None]:
story_ds = datasets.load_dataset("/home/ubuntu/MechInter/GPT-2/datasets/children-stories", split="train")


def prepare_story_dataset(ds, tokenizer):

    def format_and_tokenize(sample):
        text = (
            tokenizer.eos_token +
            "User: " + sample["prompt"] + tokenizer.eos_token + '\n\n'
            "Assistant: " + sample["text"] + tokenizer.eos_token
        )

        tokens = tokenizer(text, truncation=True, padding=False)
        return {"input_ids": tokens['input_ids'], "attention_mask": tokens['attention_mask']}
    
    ds = ds.map(
        format_and_tokenize, 
        num_proc=16,
        remove_columns=ds.column_names,
        desc="Formatting and tokenizing",
        cache_file_name="/home/ubuntu/MechInter/GPT-2/datasets/children-stories/cache.arrow",
        load_from_cache_file=True,
        writer_batch_size=50000
    )

    return ds

story_ds = prepare_story_dataset(story_ds, tokenizer)

Downloading data: 100%|██████████| 16/16 [00:00<00:00, 150131.69files/s]
Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 896668 examples [00:05, 175129.22 examples/s]


Processing dataset (format + tokenize)


In [None]:
class Trainer():
    def __init__(self, training_config: TrainingConfig, model: GPT2):
        self.training_config = training_config
        self.model = model
        self.optimizer = optim.Adam(self.model.parameters(), lr=training_config.lr, weight_decay=training_config.weight_decay)
        self.scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=training_config.epochs)
    
    def train(self, dataset: datasets.Dataset):
        dataloader = DataLoader(dataset, batch_size=self.training_config.batch_size, shuffle=True)
        for epoch in range(self.training_config.epochs):
            for batch in dataloader:
                self.optimizer.zero_grad()
                logits = self.model.forward(batch)
                loss = self.compute_loss(logits, batch)
                loss.backward()
                self.optimizer.step()
    
    def compute_loss(self, logits: torch.Tensor, batch: torch.Tensor):
        loss = F.cross_entropy(logits, batch)
        return loss
    
    def save_model(self, path: str):
        torch.save(self.model.state_dict(), path)

In [None]:

redteaming_ds = datasets.load_dataset("/home/ubuntu/MechInter/GPT-2/datasets/redteaming-dataset", split="train")


In [32]:
tokenizer.decode([50256, 198, 198, 12982, 25, 16594, 281])

'<|endoftext|>\n\nUser:Write an'

# Single layer transformer model

In [2]:
from transformer_lens import HookedTransformer, utils
import torch
cfg = {
    "seed": 49,
    "batch_size": 4096,
    "buffer_mult": 384,
    "lr": 1e-4,
    "num_tokens": int(2e9),
    "l1_coeff": 3e-4,
    "beta1": 0.9,
    "beta2": 0.99,
    "dict_mult": 8,
    "seq_len": 128,
    "d_mlp": 2048,
    "enc_dtype":"fp32",
    "remove_rare_dir": False,
}
cfg["model_batch_size"] = 64
cfg["buffer_size"] = cfg["batch_size"] * cfg["buffer_mult"]
cfg["buffer_batches"] = cfg["buffer_size"] // cfg["seq_len"]
DTYPES = {"fp32": torch.float32, "fp16": torch.float16, "bf16": torch.bfloat16}

model = HookedTransformer.from_pretrained("gelu-1l").to(DTYPES[cfg["enc_dtype"]])


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Loaded pretrained model gelu-1l into HookedTransformer
Changing model dtype to torch.float32
