In [4]:
!pip install -q transformers accelerate

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import time
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cpu = torch.device("cpu")

tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
tokenizer.pad_token = tokenizer.eos_token

input_ids = tokenizer(
    ["The future of AI is very bright."] * 4,
    return_tensors="pt",
    padding=True,
    truncation=True
).input_ids.to(device)
labels = input_ids.clone()

model = AutoModelForCausalLM.from_pretrained("distilgpt2").to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

print("Manual offloading simulation")
torch.cuda.reset_peak_memory_stats()
start = time.time()

try:
    model.train()
    outputs = model(input_ids, labels=labels)
    loss = outputs.loss

    model.to(cpu)
    time.sleep(0.1)
    model.to(device)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    end = time.time()

    print(f"[Offload Sim] Success | Loss: {loss.item():.4f} | Time: {end-start:.2f}s | GPU: {torch.cuda.max_memory_allocated() / 1e6:.2f} MB")
except RuntimeError as e:
    end = time.time()
    print(f"[Offload Sim] Failed: {e} | Time: {end-start:.2f}s")


Manual offloading simulation
[Offload Sim] Success | Loss: 4.1435 | Time: 0.49s | GPU: 2342.12 MB
