In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

drive_project_dir = "/content/drive/MyDrive/Language_Model"
os.makedirs(drive_project_dir, exist_ok=True)

# checkpoints and runs directories
os.makedirs(f"{drive_project_dir}/checkpoints", exist_ok=True)
os.makedirs(f"{drive_project_dir}/runs", exist_ok=True)

print("Drive project directory ready:", drive_project_dir)

In [2]:
# clone project
!git clone https://github.com/JennyGVoice/Language_Model.git

Cloning into 'Language_Model'...
remote: Enumerating objects: 73, done.[K
remote: Counting objects: 100% (73/73), done.[K
remote: Compressing objects: 100% (51/51), done.[K
remote: Total 73 (delta 33), reused 55 (delta 15), pack-reused 0 (from 0)[K
Receiving objects: 100% (73/73), 442.25 KiB | 14.74 MiB/s, done.
Resolving deltas: 100% (33/33), done.


In [1]:
%cd Language_Model
!pwd
!ls -a

[Errno 2] No such file or directory: 'Language_Model'
/content
/content
.  ..  .config	sample_data


In [4]:
# test cuda availability
import torch
torch.cuda.is_available(), torch.cuda.get_device_name()

(True, 'Tesla T4')

In [5]:
!nvidia-smi

Tue Dec  9 20:39:36 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   66C    P8             12W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [6]:
from src.train import main
main()

Using device: cuda
Begin evaluation/ 6000

step 0 | train loss 4.3784 | val loss 5.3287 | ppl 206.18
Saved: experiments/checkpoints/step_0.pt
Begin evaluation1 / 6000

step 200 | train loss 2.0549 | val loss 2.1990 | ppl 9.02
Saved: experiments/checkpoints/step_200.pt
Begin evaluation1 / 6000

step 400 | train loss 1.5895 | val loss 1.6775 | ppl 5.35
Saved: experiments/checkpoints/step_400.pt
Begin evaluation1 / 6000

step 600 | train loss 1.4818 | val loss 1.3214 | ppl 3.75
Saved: experiments/checkpoints/step_600.pt
Begin evaluation1 / 6000

step 800 | train loss 1.3706 | val loss 1.1750 | ppl 3.24
Saved: experiments/checkpoints/step_800.pt
Begin evaluation01 / 6000

step 1000 | train loss 1.2798 | val loss 1.1071 | ppl 3.03
Saved: experiments/checkpoints/step_1000.pt
Begin evaluation01 / 6000

step 1200 | train loss 1.2286 | val loss 1.1403 | ppl 3.13
Saved: experiments/checkpoints/step_1200.pt
Begin evaluation01 / 6000

step 1400 | train loss 1.1282 | val loss 1.0817 | ppl 2.95
Save

In [None]:
%load_ext tensorboard
%tensorboard --logdir experiments/runs

In [None]:
import torch
from src.dataset import CharDataset
from src.model.gpt_like import GPTLanguageModel

# load data
with open("data/input.txt", "r") as f:
    text = f.read()

dataset = CharDataset(text, block_size=128, split="train")

# choose checkpoint
ckpt_path = "experiments/checkpoints/step_200.pt"

device = "cuda" if torch.cuda.is_available() else "cpu"
model = GPTLanguageModel(dataset.vocab_size).to(device)

# load checkpoint
ckpt = torch.load(ckpt_path, map_location=device)
model.load_state_dict(ckpt["model"])
model.eval()

# test generation
prompt = "O God, O God!"
idx = torch.tensor([dataset.encode(prompt)]).to(device)
generated = model.generate(idx, max_new_tokens=300)
print(dataset.decode(generated[0].tolist()))

In [None]:
from torch.utils.data import DataLoader
from src.model.metrics import calculate_ppl

val_dataset = CharDataset(text, block_size=128, split="val")
val_loader = DataLoader(val_dataset, batch_size=64)

ppl = calculate_ppl(model, val_loader, device=device)
print("Validation Perplexity:", ppl)