In [1]:
!git clone https://github.com/karpathy/nanoGPT.git


Cloning into 'nanoGPT'...
remote: Enumerating objects: 689, done.[K
remote: Total 689 (delta 0), reused 0 (delta 0), pack-reused 689 (from 1)[K
Receiving objects: 100% (689/689), 975.24 KiB | 6.46 MiB/s, done.
Resolving deltas: 100% (382/382), done.


In [2]:
%%bash
cd /content/nanoGPT
mkdir -p data/abc


In [3]:
%%bash
cp /content/drive/MyDrive/meta.pkl \
   /content/nanoGPT/data/abc/meta.pkl


Generating Unconditional Text

In [None]:
%%bash
cd /content/nanoGPT

python sample.py \
  --out_dir=/content/drive/MyDrive/nanoGPT_runs/out-abc-xl \
  --num_samples=1 \
  --max_new_tokens=100 \
  --temperature=1.0 \
  --top_k=50


Overriding: out_dir = /content/drive/MyDrive/nanoGPT_runs/out-abc-xl
Overriding: num_samples = 1
Overriding: max_new_tokens = 100
Overriding: temperature = 1.0
Overriding: top_k = 50
number of parameters: 177.06M
Loading meta from data/abc/meta.pkl...

[A-^FD,-]/2[AD,-]/2[DD,-] [A-F-D-D,]/2[AFD]/2D,/2z/2 [dAFD,]z/2D/2- [AFDA,]/2z[^A-G-^D-D,-]/2| \
[^A
---------------


  self.setter(val)


For 10 samples

In [None]:
%%bash
cd /content/nanoGPT

python sample.py \
  --out_dir=/content/drive/MyDrive/nanoGPT_runs/out-abc-xl \
  --num_samples=10 \
  --max_new_tokens=1200 \
  --temperature=1.0 \
  --top_k=50 \
  > xl_unconditional.txt


  self.setter(val)


Conditional Data Generation.

In [None]:
%%bash
cd /content/nanoGPT

# 1. Write the tricky prompt to a file safely
cat << 'EOF' > prompt.txt
D G B A | G E D B | A B d e | d B A G |
EOF

# "$(cat prompt.txt)" pulls the text safely into the command
python sample.py \
    --out_dir=/content/drive/MyDrive/nanoGPT_runs/out-abc-xl \
    --start="$(cat prompt.txt)" \
    --num_samples=10 \
    --max_new_tokens=1200 \
    --device=cuda
    > xl_conditional.txt

Overriding: out_dir = /content/drive/MyDrive/nanoGPT_runs/out-abc-xl
Overriding: start = D G B A | G E D B | A B d e | d B A G |
Overriding: num_samples = 10
Overriding: max_new_tokens = 1200
Overriding: device = cuda
number of parameters: 177.06M
Loading meta from data/abc/meta.pkl...
D G B A | G E D B | A B d e | d B A G | \
e2 e2 B2 d2| \
BA G4 z2|
z3E GE GA| \
GE GG A4| \
z3E GE GA| \
GE GG A2 G2|
EE EE DD DE| \
CC CD B,B, B,C| \
DD DC B,2 z2| \
z4  (3FGA  (3Bcd|
ed cc BA GB| \
cB AG FG AE| \
D2 z6| \
z/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2|
[GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2| \
[GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2| \
[GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2|
[GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE]/2[GE]/2[GE]/2 [GE]/2[GE

  self.setter(val)


In [8]:
import os, pickle, numpy as np, torch
import sys
sys.path.append('/content/nanoGPT')
from model import GPTConfig, GPT

# -------------------------
# SET THESE PATHS
# -------------------------
out_dir  = "/content/drive/MyDrive/nanoGPT_runs/out-abc-xl"
data_dir = "/content/nanoGPT/data/abc"
split    = "test"

# match training context/batch if possible
batch_size = 8
block_size = 256
eval_iters = 200

device = "cuda" if torch.cuda.is_available() else "cpu"

# -------------------------
# LOAD META + DATA
# -------------------------
with open(os.path.join(data_dir, "meta.pkl"), "rb") as f:
    meta = pickle.load(f)

data = np.memmap(os.path.join(data_dir, f"{split}.bin"), dtype=np.uint16, mode="r")

def get_batch():
    ix = torch.randint(0, len(data) - block_size - 1, (batch_size,))
    x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix])
    y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix])
    return x.to(device), y.to(device)

# -------------------------
# LOAD CHECKPOINT
# -------------------------
ckpt_path = os.path.join(out_dir, "ckpt.pt")
ckpt = torch.load(ckpt_path, map_location=device)

model_args = ckpt["model_args"]  # contains n_layer, n_head, n_embd, block_size, vocab_size, etc.
model = GPT(GPTConfig(**model_args))
model.load_state_dict(ckpt["model"])
model.to(device)
model.eval()

# -------------------------
# EVAL
# -------------------------
losses = []
with torch.no_grad():
    for _ in range(eval_iters):
        xb, yb = get_batch()
        _, loss = model(xb, yb)
        losses.append(loss.item())

mean_loss = float(np.mean(losses))
ppl = float(np.exp(mean_loss))
print(f"{split} loss: {mean_loss:.4f}")
print(f"{split} perplexity: {ppl:.2f}")

number of parameters: 177.06M
test loss: 0.4138
test perplexity: 1.51
