# GPT2 project

### Imports and setups

In [None]:
!pip install transformers torch
!pip install tiktoken
!pip install wandb

In [None]:
from dataloader import make_loaders
from model import GPT, GPTConfig
from train import train
from text_generation import generate_text
import torch
from torch.utils.data import DataLoader

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using", device)

In [None]:
!wandb login

## Training and fine tuning

### Model

In [None]:
model=GPT(GPTConfig()) # or : model=GPT.from_pretrained("gpt2")

model.to(device)

compiled_model = torch.compile(model)

### Dataset

In [None]:
train_loader, val_loader = make_loaders(
        path="input.txt",
        block_size=1024,
        overlap=896 ,
        batch_size=16,
        val_frac=0.1,
    )

### Training

In [None]:
hist = train(
    compiled_model,
    train_loader,
    val_loader,
    epochs        = 5,
    lr            = 1e-5,     # base LR (warm-up will ramp to this)
    device        = device,
    clip_grad_norm= 1.0,      # keep gradients stable
    weight_decay  = 0.1,     # regularise big GPT on small corpus
    warmup_steps  = 200,      # ~2–3 batches on batch=4, seq=1024
    wandb_project="tiny-shakespeare-gpt",
    wandb_run_name="pre-trained",
    max_val_loss_increase = 1.03, #early-stoping
    early_stopping_patience = 2   #early-stoping
)


In [None]:
name = "gpt2_shakespear.pt"

torch.save(model.state_dict(), name)

## Text generation and analysis

### Load models

In [None]:
model = GPT(GPTConfig())

name = "gpt2_shakespeare.pt"
model.load_state_dict(torch.load(name))

model.to(device)

In [None]:
prompt = "ALBERT EINSTEIN"

generated_text=generate_text(model, prompt, device, top_p=0.9, temperature=0.8)

print(generated_text)