In [None]:
import os, platform
print("CWD:", os.getcwd())
print("Python:", platform.python_version())

try:
    import torch
    print("CUDA available:", torch.cuda.is_available())
    if torch.cuda.is_available():
        print("GPU:", torch.cuda.get_device_name(0))
except ImportError:
    print("torch not installed yet")


In [None]:
# Install dependencies inside Colab kernel
!pip install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install -U transformers accelerate datasets peft trl python-chess bitsandbytes


In [None]:
import os

print("Initial contents in current directory:")
for name in os.listdir():
    print("-", name)

# Ensure we are inside your SFT repo on Colab
if not os.path.exists('train_scripts/train.py'):
    if not os.path.exists('sft'):
        print('Cloning your repo Bot-Rakshit/sft (no submodules)...')
        !git clone https://github.com/Bot-Rakshit/sft.git
    os.chdir('sft')
    print('Changed directory to:', os.getcwd())

print('Repo contents (after potential cd):')
for name in os.listdir():
    print("-", name)

assert os.path.exists('train_scripts/train.py'), 'train_scripts/train.py still not found; check that your repo has train_scripts.'


In [None]:
# Optional: generate Boychesser-style data on Colab if missing
import os

if not os.path.exists("train_data_boychesser.jsonl"):
    !python train_scripts/data_prep_boychesser.py --max-samples 80000 --output train_data_boychesser.jsonl
else:
    print("train_data_boychesser.jsonl already present, skipping generation.")


In [None]:
# Verify the training data
!wc -l train_data_boychesser.jsonl
!head -n 2 train_data_boychesser.jsonl


In [None]:
# Train Qwen2.5-3B with LoRA on full 80K Boychesser dataset
!python train_scripts/train.py \
  --model Qwen/Qwen2.5-3B-Instruct \
  --data train_data_boychesser.jsonl \
  --output qwen-chess-3b-sft-bc-80k \
  --epochs 1 \
  --batch-size 2 \
  --grad-accum 8 \
  --lr 1.5e-4 \
  --max-seq-length 320 \
  --dtype auto


In [None]:
# Merge LoRA adapter with base model
!python train_scripts/merge_model.py \
  --base-model Qwen/Qwen2.5-3B-Instruct \
  --adapter qwen-chess-3b-sft-bc-80k \
  --output qwen-chess-3b-merged-bc-80k

In [None]:
# Test the merged model with a sample position
!python train_scripts/test_model.py \
  --model qwen-chess-3b-merged-bc-80k \
  --fen "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1" \
  --legal-moves "e2e4,d2d4,g1f3"

In [None]:
# Save the merged model to Google Drive (optional)
# Uncomment the lines below to mount Google Drive and copy the model
# from google.colab import drive
# drive.mount('/content/drive')
# !cp -r qwen-chess-3b-merged-bc-80k /content/drive/MyDrive/

# Or create a zip file for download
!zip -r qwen-chess-3b-merged-bc-80k.zip qwen-chess-3b-merged-bc-80k
print("Model saved as qwen-chess-3b-merged-bc-80k.zip")
print("You can download it from the Files panel on the left")