In [3]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm

def batchify(sequence, batch_size):
    num_batches = len(sequence) // batch_size
    return [sequence[i*batch_size:(i+1)*batch_size] for i in range(num_batches)]

# Set up the device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2').to(device)

with open('input.txt', 'r') as file:
    text = file.read()
    
tokens = tokenizer.encode(text)

epochs = 3
batch_size = 32
learning_rate = 10e-10
warmup_steps = 3000

optimizer = AdamW(model.parameters(), lr=learning_rate)
total_steps = len(tokens) // (batch_size * epochs)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps)

token_batches = batchify(tokens, batch_size)

for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    progress_bar = tqdm(token_batches, desc=f'Epoch {epoch+1}', unit='batch')

    for token_batch in progress_bar:
        input_ids = torch.tensor(token_batch).unsqueeze(0).to(device)
        labels = input_ids.clone().detach()
        labels[0, :-1] = input_ids[0, 1:]
        labels[0, -1] = input_ids[0, 0]
        outputs = model(input_ids=input_ids, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

        epoch_loss += loss.item()
        progress_bar.set_postfix({'Loss': f'{epoch_loss / (progress_bar.n + 1):.2f}'})
    
    print(f'Epoch {epoch+1} average loss: {epoch_loss / len(token_batches):.2f}')

model.save_pretrained('finetuned_gpt2')
tokenizer.save_pretrained('finetuned_gpt2')

Token indices sequence length is longer than the specified maximum sequence length for this model (338025 > 1024). Running this sequence through the model will result in indexing errors
Epoch 1: 100%|██████████| 10563/10563 [15:26<00:00, 11.40batch/s, Loss=8.87]


Epoch 1 average loss: 8.87


Epoch 2: 100%|██████████| 10563/10563 [15:29<00:00, 11.36batch/s, Loss=8.87]


Epoch 2 average loss: 8.87


Epoch 3: 100%|██████████| 10563/10563 [15:28<00:00, 11.38batch/s, Loss=8.87]


Epoch 3 average loss: 8.87


('finetuned_gpt2\\tokenizer_config.json',
 'finetuned_gpt2\\special_tokens_map.json',
 'finetuned_gpt2\\vocab.json',
 'finetuned_gpt2\\merges.txt',
 'finetuned_gpt2\\added_tokens.json')

In [6]:
from fastapi import FastAPI
from transformers import GPT2LMHeadModel, GPT2Tokenizer

app = FastAPI()

# Load the fine-tuned GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained("finetuned_gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("finetuned_gpt2")

# Set the maximum sequence length
MAX_LENGTH = 1024

# Define the API endpoint for text generation
@app.post("/generate_text/")
async def generate_text(prompt: str):
    # Encode the prompt using the tokenizer
    input_ids = tokenizer.encode(prompt, return_tensors="pt")

    # Generate text with the model
    output = model.generate(
        input_ids=input_ids,
        max_length=MAX_LENGTH,
        temperature=0.7,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        repetition_penalty=1.0,
        num_return_sequences=1,
    )

    # Decode the generated text and return it
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return {"generated_text": generated_text}