# Chess Model Training - Qwen2.5-1.5B-Instruct

Fine-tune Qwen2.5-1.5B-Instruct on ~280K chess positions for move prediction.

**Hardware**: A40/A100 GPU

**Training**: Full fine-tuning (recommended for best quality)

## 1. Setup & Installation

In [None]:
# Install dependencies
!pip install -q torch transformers datasets accelerate bitsandbytes
!pip install -q trl peft wandb hf_transfer
!pip install -q flash-attn --no-build-isolation

In [None]:
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

import json
import torch
from pathlib import Path
from datetime import datetime

from datasets import load_dataset, Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    BitsAndBytesConfig,
)
from trl import SFTTrainer, SFTConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## 2. Configuration

In [None]:
# =============================================================================
# CONFIGURATION
# =============================================================================

# Model
BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
OUTPUT_DIR = "./qwen-chess-1.5b-v1"

# Data paths - split into 2 files to stay under GitHub 100MB limit
DATA_PART1 = "train_data_part1.jsonl"
DATA_PART2 = "train_data_part2.jsonl"

# Training mode: "full" or "lora"
TRAINING_MODE = "full"  # Full fine-tuning for best quality

# Hyperparameters
CONFIG = {
    # Training
    "num_epochs": 3,
    "batch_size": 4,
    "gradient_accumulation": 8,
    "learning_rate": 2e-5,
    "warmup_ratio": 0.03,
    "weight_decay": 0.01,
    "max_seq_length": 512,
    
    # LoRA specific (only if TRAINING_MODE="lora")
    "lora_r": 64,
    "lora_alpha": 128,
    "lora_dropout": 0.05,
    
    # Optimization
    "use_flash_attention": True,
    "bf16": True,
    "gradient_checkpointing": True,
    
    # Logging
    "logging_steps": 50,
    "save_steps": 1000,
    "eval_steps": 500,
    "use_wandb": False,
}

print(f"Training mode: {TRAINING_MODE}")
print(f"Effective batch size: {CONFIG['batch_size'] * CONFIG['gradient_accumulation']}")

## 3. Load and Prepare Data

In [None]:
def load_jsonl(filepath):
    """Load JSONL file."""
    data = []
    with open(filepath, 'r') as f:
        for line in f:
            data.append(json.loads(line.strip()))
    return data

# Load and merge data parts
print("Loading data...")
part1 = load_jsonl(DATA_PART1)
part2 = load_jsonl(DATA_PART2)

print(f"Part 1: {len(part1):,}")
print(f"Part 2: {len(part2):,}")

# Combine
all_data = part1 + part2
print(f"Total: {len(all_data):,}")

# Shuffle
import random
random.seed(42)
random.shuffle(all_data)

In [None]:
# Preview a sample
sample = all_data[0]
print("Sample training example:")
print("="*60)
print("USER:")
print(sample["messages"][0]["content"][:500])
print("\nASSISTANT:")
print(sample["messages"][1]["content"])

In [None]:
# Create train/eval split
train_size = int(len(all_data) * 0.98)
train_data = all_data[:train_size]
eval_data = all_data[train_size:]

print(f"Train: {len(train_data):,}")
print(f"Eval: {len(eval_data):,}")

# Convert to HuggingFace Dataset
train_dataset = Dataset.from_list(train_data)
eval_dataset = Dataset.from_list(eval_data)

print(f"\nDataset columns: {train_dataset.column_names}")

## 4. Load Model and Tokenizer

In [None]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)

# Set padding token if not set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

tokenizer.padding_side = "right"

print(f"Vocab size: {tokenizer.vocab_size}")
print(f"Pad token: {tokenizer.pad_token}")
print(f"EOS token: {tokenizer.eos_token}")

In [None]:
# Model loading configuration
model_kwargs = {
    "trust_remote_code": True,
    "torch_dtype": torch.bfloat16 if CONFIG["bf16"] else torch.float16,
    "device_map": "auto",
}

# Add flash attention if available
if CONFIG["use_flash_attention"]:
    try:
        model_kwargs["attn_implementation"] = "flash_attention_2"
        print("Using Flash Attention 2")
    except:
        print("Flash Attention not available, using default")

# For LoRA with 4-bit quantization
USE_4BIT = False

if TRAINING_MODE == "lora" and USE_4BIT:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=True,
    )
    model_kwargs["quantization_config"] = bnb_config
    print("Using 4-bit quantization (QLoRA)")

# Load model
print(f"Loading {BASE_MODEL}...")
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, **model_kwargs)

# Enable gradient checkpointing
if CONFIG["gradient_checkpointing"]:
    model.gradient_checkpointing_enable()

print(f"Model loaded! Parameters: {model.num_parameters():,}")

In [None]:
# Apply LoRA if selected
if TRAINING_MODE == "lora":
    print("Applying LoRA...")
    
    if USE_4BIT:
        model = prepare_model_for_kbit_training(model)
    
    lora_config = LoraConfig(
        r=CONFIG["lora_r"],
        lora_alpha=CONFIG["lora_alpha"],
        lora_dropout=CONFIG["lora_dropout"],
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=[
            "q_proj", "k_proj", "v_proj", "o_proj",
            "gate_proj", "up_proj", "down_proj"
        ],
    )
    
    model = get_peft_model(model, lora_config)
    model.print_trainable_parameters()
else:
    print("Full fine-tuning mode - all parameters trainable")

## 5. Training

In [None]:
# Training arguments
training_args = SFTConfig(
    output_dir=OUTPUT_DIR,
    num_train_epochs=CONFIG["num_epochs"],
    per_device_train_batch_size=CONFIG["batch_size"],
    per_device_eval_batch_size=CONFIG["batch_size"],
    gradient_accumulation_steps=CONFIG["gradient_accumulation"],
    learning_rate=CONFIG["learning_rate"] if TRAINING_MODE == "full" else 2e-4,
    warmup_ratio=CONFIG["warmup_ratio"],
    weight_decay=CONFIG["weight_decay"],
    
    # Precision
    bf16=CONFIG["bf16"],
    
    # Logging
    logging_steps=CONFIG["logging_steps"],
    logging_dir=f"{OUTPUT_DIR}/logs",
    
    # Saving
    save_strategy="steps",
    save_steps=CONFIG["save_steps"],
    save_total_limit=3,
    
    # Evaluation
    eval_strategy="steps",
    eval_steps=CONFIG["eval_steps"],
    
    # Misc
    seed=42,
    report_to="wandb" if CONFIG["use_wandb"] else "none",
    run_name=f"chess-qwen-1.5b-{datetime.now().strftime('%Y%m%d-%H%M')}",
)

print("Training arguments configured!")

In [None]:
# Initialize trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    processing_class=tokenizer,
    max_seq_length=CONFIG["max_seq_length"],
)

print("Trainer initialized!")
print(f"Training examples: {len(train_dataset):,}")
print(f"Eval examples: {len(eval_dataset):,}")
print(f"Total steps: {len(train_dataset) // (CONFIG['batch_size'] * CONFIG['gradient_accumulation']) * CONFIG['num_epochs']}")

In [None]:
# Start training!
print("Starting training...")
print("="*60)

trainer.train()

print("="*60)
print("Training complete!")

## 6. Save Model

In [None]:
# Save the final model
final_model_path = f"{OUTPUT_DIR}/final"

print(f"Saving model to {final_model_path}...")

if TRAINING_MODE == "lora":
    model.save_pretrained(final_model_path)
    tokenizer.save_pretrained(final_model_path)
    print("LoRA adapters saved!")
    
    # Merge and save full model
    print("Merging LoRA into base model...")
    merged_model = model.merge_and_unload()
    merged_path = f"{OUTPUT_DIR}/merged"
    merged_model.save_pretrained(merged_path)
    tokenizer.save_pretrained(merged_path)
    print(f"Merged model saved to {merged_path}")
else:
    trainer.save_model(final_model_path)
    tokenizer.save_pretrained(final_model_path)
    print("Full model saved!")

## 7. Test the Model

In [None]:
# Quick test
def test_model(model, tokenizer, fen, legal_moves):
    """Test the model on a position."""
    prompt = f"""You are an expert chess player. Here is the position in FEN format:
{fen}

Legal moves: {legal_moves}

Select the best move. Keep your thinking to 2 sentences or less, then output your chosen move.
Format:
<think>brief thinking (2 sentences max)</think>
<uci_move>your_move</uci_move>"""
    
    messages = [{"role": "user", "content": prompt}]
    
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(text, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.1,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id,
        )
    
    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
    return response

# Test on starting position
test_fen = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"
test_moves = "e2e4 d2d4 g1f3 b1c3 c2c4 e2e3 g2g3 b2b3 f2f4 a2a3"

print("Testing model on starting position...")
print("="*60)
response = test_model(model, tokenizer, test_fen, test_moves)
print(response)

In [None]:
# Test on a tactical position
test_fen2 = "r1bqkb1r/pppp1ppp/2n2n2/4p3/2B1P3/5N2/PPPP1PPP/RNBQK2R w KQkq - 4 4"
test_moves2 = "d2d3 d2d4 b1c3 c2c3 e1g1 a2a3 h2h3 b2b4"

print("Testing on Italian Game position...")
print("="*60)
response = test_model(model, tokenizer, test_fen2, test_moves2)
print(response)

## 8. Upload to Hugging Face (Optional)

In [None]:
# Uncomment and run to upload to HuggingFace Hub

# from huggingface_hub import login
# login()  # Enter your HF token

# HF_REPO = "your-username/qwen-chess-1.5b"
# 
# model.push_to_hub(HF_REPO)
# tokenizer.push_to_hub(HF_REPO)
# 
# print(f"Model uploaded to https://huggingface.co/{HF_REPO}")