Training GPT-2 Model, This notebook provides step-by-step code cells to train a GPT-2 model from your dataset. Make sure all necessary packages are installed and properly loaded before running the cells.

Lets define hyperparameters for our training model.

In [None]:
BATCH_SIZE = 8
EPOCHS = 1
LR = 0.001
EPS = 1e-8

Define all the helper functions necessary for the training.

In [None]:

def load_and_process_data(data_path):
    with open(data_path, 'r') as file:
        text = file.read()
    return text


def tokenize_data(tokenizer, text):
    encodings = tokenizer.encode(text, return_tensors='pt')
    return encodings

def prepare_model(device, lr, eps):
    model = GPT2LMHeadModel.from_pretrained('gpt2')
    model = model.to(device)
    optimizer = AdamW(model.parameters(), lr=lr, eps=eps)
    return model, optimizer

def perform_training_step(device, model, optimizer, batch):
    model.zero_grad()
    batch = {k: v.to(device) for k, v in batch.items()}
    outputs = model(**batch)
    loss = outputs[0]
    loss.backward()
    optimizer.step()
    return loss.item()

def train(device, model, optimizer, dataloader):
    model.train()
    for epoch in range(EPOCHS):
        for idx, batch in enumerate(dataloader):
            loss = perform_training_step(device, model, optimizer, batch)
            if idx % 100 == 0:
                print(f'Current loss: {loss}')

def save_model(model, model_dir):
    model.save_pretrained(model_dir)

