# RL Text-to-SQL Training on Google Colab

This notebook demonstrates how to train a Text-to-SQL model with RL on a 24GB GPU (Colab A100).

**Runtime:** GPU (A100 recommended)

**Estimated time:** 2-4 hours for 3 epochs on Spider train set

## 1. Setup

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Clone repository
!git clone https://github.com/yourusername/rl-text2sql.git
%cd rl-text2sql

In [None]:
# Install dependencies
!pip install -q -U pip
!pip install -q transformers>=4.35.0 accelerate>=0.25.0 peft>=0.7.0 bitsandbytes>=0.41.0
!pip install -q trl>=0.7.0 datasets>=2.14.0 pyyaml timeout-decorator

## 2. Download Spider Dataset

In [None]:
# Download Spider dataset
!mkdir -p data
!wget -O data/spider.zip https://drive.google.com/uc?export=download&id=1TqleXec_OykOYFREKKtschzY29dUcVAQ
!unzip -q data/spider.zip -d data/
!ls -la data/spider/

## 3. Quick Test: Reward Function

In [None]:
from reward import SQLRewardCalculator, RewardConfig

# Initialize reward calculator
config = RewardConfig(
    execution_weight=1.0,
    partial_weight=0.3,
    use_partial_rewards=True
)

reward_calc = SQLRewardCalculator(db_path="dummy.db", config=config)

# Test partial rewards
pred_sql = "SELECT name, age FROM users WHERE age > 18"
gold_sql = "SELECT name, age FROM users WHERE age > 18 ORDER BY age"

partial = reward_calc.partial_rewards(pred_sql, gold_sql)
print(f"Partial reward: {partial:.3f}")

# Test component extraction
components = reward_calc._extract_sql_components(pred_sql)
print(f"\nSQL Components:")
for comp_type, comp_set in components.items():
    print(f"  {comp_type}: {comp_set}")

## 4. Load Model and Test Generation

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Model name - choose smaller model for faster testing
model_name = "Qwen/Qwen2.5-Coder-3B-Instruct"  # 3B parameters
# Alternative: "codellama/CodeLlama-7b-hf"

print(f"Loading {model_name}...")

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# Prepare for training
model = prepare_model_for_kbit_training(model)

# LoRA config
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Add LoRA
model = get_peft_model(model, lora_config)
model.gradient_checkpointing_enable()

model.print_trainable_parameters()

print("\nModel loaded successfully!")

In [None]:
# Test generation
def generate_sql(question, model, tokenizer):
    prompt = f"Question: {question}\nSQL:"
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    outputs = model.generate(
        **inputs,
        max_new_tokens=128,
        temperature=0.7,
        do_sample=True
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Test
question = "What are the names of all students?"
response = generate_sql(question, model, tokenizer)
print(f"Question: {question}")
print(f"Response: {response}")

## 5. Prepare Training Data (Small Subset)

In [None]:
import json

# Load Spider data
with open('data/spider/train_spider.json', 'r') as f:
    spider_data = json.load(f)

print(f"Total training examples: {len(spider_data)}")

# For quick testing, use a small subset
subset_size = 100  # Adjust based on time constraints
train_subset = spider_data[:subset_size]

# Save subset
with open('data/spider/train_subset.json', 'w') as f:
    json.dump(train_subset, f)

print(f"Using {len(train_subset)} examples for training")
print(f"\nExample:")
print(json.dumps(train_subset[0], indent=2))

## 6. Training Configuration

In [None]:
# Create config for quick training
config = {
    'model_name': model_name,
    'use_qlora': True,
    'train_data': 'data/spider/train_subset.json',
    'db_root': 'data/spider/database',
    'num_samples': 4,
    'temperature': 0.7,
    'num_epochs': 2,  # Reduced for quick testing
    'batch_size': 1,
    'gradient_accumulation_steps': 4,  # Reduced for speed
    'learning_rate': 1e-5,
    'kl_coef': 0.1,
    'execution_weight': 1.0,
    'partial_weight': 0.3,
    'output_dir': 'outputs/rl-model',
    'seed': 42
}

# Save config
import yaml
with open('config_colab.yaml', 'w') as f:
    yaml.dump(config, f)

print("Configuration:")
print(yaml.dump(config, default_flow_style=False))

## 7. Run Training

In [None]:
# Run training script
!python train_rl.py --config config_colab.yaml

## 8. Evaluation

In [None]:
# Evaluate on dev set
!python evaluate.py \
    --model_path outputs/rl-model \
    --base_model $model_name \
    --test_data data/spider/dev.json \
    --db_root data/spider/database \
    --output_file results/predictions.json

In [None]:
# Load and display results
with open('results/predictions.json', 'r') as f:
    results = json.load(f)

print("=" * 80)
print("EVALUATION RESULTS")
print("=" * 80)
print(f"Execution Accuracy: {results['metrics']['execution_accuracy']:.2%}")
print(f"Exact Match:        {results['metrics']['exact_match']:.2%}")
print(f"Total Examples:     {results['metrics']['total']}")
print("=" * 80)

# Show some examples
print("\nSample Predictions:")
for i, pred in enumerate(results['predictions'][:5]):
    print(f"\n--- Example {i+1} ---")
    print(f"Question: {pred['question']}")
    print(f"Gold SQL: {pred['gold_sql']}")
    print(f"Pred SQL: {pred['pred_sql']}")
    print(f"Correct: {pred['execution_correct']}")

## 9. Interactive Testing

In [None]:
# Load trained model for interactive testing
from peft import PeftModel

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16
)

# Load LoRA adapters
trained_model = PeftModel.from_pretrained(base_model, "outputs/rl-model")
trained_model.eval()

print("Trained model loaded!")

In [None]:
# Interactive generation
def ask_question(question):
    prompt = f"Question: {question}\nSQL:"
    
    inputs = tokenizer(prompt, return_tensors="pt").to(trained_model.device)
    
    outputs = trained_model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.1,
        do_sample=False
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract SQL
    sql = response.split("SQL:")[-1].strip()
    
    return sql

# Try some questions
questions = [
    "What are the names of all students?",
    "How many students are there?",
    "What is the average age of students?"
]

for q in questions:
    sql = ask_question(q)
    print(f"Q: {q}")
    print(f"A: {sql}")
    print()

## 10. Save to Drive (Optional)

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Copy model to Drive
!cp -r outputs/rl-model /content/drive/MyDrive/rl-text2sql-model
!cp results/predictions.json /content/drive/MyDrive/

print("Model and results saved to Google Drive!")