# svgllm SFT on Colab (LLaVA 1.5 7B)

This notebook mounts Google Drive, installs dependencies, clones the repo, and runs a small SFT job using data stored on Drive.


In [None]:
# 1) Runtime: set GPU to A100 in Colab UI: Runtime > Change runtime type > GPU > A100
from google.colab import drive  # type: ignore

drive.mount('/content/drive', force_remount=True)

COLAB_ROOT = '/content'
REPO_DIR = f'{COLAB_ROOT}/svgllm'
DRIVE_ROOT = '/content/drive/MyDrive/svgllm'
DATA_DIR = f'{DRIVE_ROOT}/data/svgs'  # customize if needed
OUTPUT_DIR = f'{DRIVE_ROOT}/runs/sft-llava'

print('Drive mounted. Data dir:', DATA_DIR)


In [None]:
# 2) Install dependencies and clone repo
!pip -q install -U uv
!test -d $REPO_DIR || git clone https://github.com/JacobAsmuth/svgllm $REPO_DIR
%cd $REPO_DIR
!uv sync


In [None]:
# 3) Quick dataset sanity check: count and preview a rendered sample
import os
from PIL import Image
from svgllm.data.svg_dataset import SvgSftDataset

ds = SvgSftDataset(DATA_DIR, image_size=(256, 256), max_items=8)
print('Num samples (capped):', len(ds))

if len(ds) > 0:
  ex = ds[0]
  display(ex.image)
  print(ex.filename, len(ex.svg_text))


In [None]:
# 4) Dry-run collator shapes
!uv run python -m scripts.train_sft_llava --dry-run --data-dir $DATA_DIR --max-items 2 --batch-size 1


In [None]:
# 5) Train (small)
!uv run python -m scripts.train_sft_llava \
  --data-dir $DATA_DIR \
  --max-items 512 \
  --batch-size 1 \
  --output-dir $OUTPUT_DIR
