In [1]:
#| default_exp templates

# PBS Job Templates
> Generate PBS submission scripts for VASP calculations with chasqui workflow integration

This module creates PBS scripts that:
- Execute VASP calculations with proper environment setup
- Handle job completion (success/failure)
- Write completion flags for status tracking
- Trigger the remote agent to submit waiting jobs
- Log execution details

## Template Structure

Each generated PBS script includes:

1. **PBS Directives** - Resource allocation (#PBS -l, -A, etc.)
2. **Environment Setup** - Module loads, ulimit, OMP settings
3. **VASP Execution** - MPI run with proper parameters
4. **Completion Handler** - Always runs, even on failure
5. **Agent Trigger** - Starts next job submission cycle

## Integration with Workflow
```
Job Script Template
       ↓
   PBS Queue
       ↓
  VASP Runs
       ↓
Completion Handler → Writes flag → Triggers agent.sh
                                          ↓
                                   Submits next jobs
```

In [2]:
#| export
from string import Template
from pathlib import Path
from typing import Optional, Dict, Any
from datetime import datetime

## PBS Script Template

Based on regular script with enhancements for workflow automation.

In [3]:
#| export

PBS_TEMPLATE = """#!/bin/bash

#PBS -N $JOB_NAME
#PBS -l select=$CORES:ncpus=36:mpiprocs=36
#PBS -A $PROJECT
#PBS -l walltime=$TIME
#PBS -j oe
#PBS -o $JOB_NAME.out

# Job metadata for chasqui
JOB_ID="$JOB_ID"
CHASQUI_DIR="$CHASQUI_DIR"
WORK_DIR="$WORK_DIR"

# Change to work directory where VASP inputs are located
cd $$WORK_DIR || { echo "ERROR: Cannot cd to $$WORK_DIR"; exit 1; }

echo "======================================"
echo "Job: $JOB_NAME"
echo "Job ID: $$PBS_JOBID"
echo "Chasqui ID: $$JOB_ID"
echo "Work Directory: $$WORK_DIR"
echo "Started: $$(date)"
echo "======================================"

# Get node count from PBS
NNODES=`wc -l < $$PBS_NODEFILE`
echo "Nodes allocated: $$NNODES"

# Environment setup
ulimit -s unlimited
export OMP_NUM_THREADS=1

# Load modules
module purge
module load vasp/6.4.3

# Run VASP
echo "Running VASP..."
mpirun -np $$NNODES $VASP

# Capture exit code
EXIT_CODE=$$?

echo "======================================"
echo "VASP finished with exit code: $$EXIT_CODE"
echo "Completed: $$(date)"
echo "======================================"

# ============================================
# CHASQUI COMPLETION HANDLER (always runs)
# ============================================

if [ $$EXIT_CODE -eq 0 ]; then
    STATUS="DONE"
    echo "✓ Job completed successfully"
else
    STATUS="FAIL"
    echo "✗ Job failed with exit code $$EXIT_CODE"
fi

# Write completion flag
COMPLETED_DIR="$$CHASQUI_DIR/completed"
mkdir -p $$COMPLETED_DIR
echo "$$STATUS" > $$COMPLETED_DIR/$${JOB_ID}.flag
echo "$$PBS_JOBID" >> $$COMPLETED_DIR/$${JOB_ID}.flag
echo "$$(date)" >> $$COMPLETED_DIR/$${JOB_ID}.flag
echo "$$WORK_DIR" >> $$COMPLETED_DIR/$${JOB_ID}.flag

# Move job script to completed
SUBMITTED_DIR="$$CHASQUI_DIR/submitted"
mv $$SUBMITTED_DIR/$${JOB_ID}.sh $$COMPLETED_DIR/ 2>/dev/null

# Log completion
AGENT_LOG="$$CHASQUI_DIR/logs/agent.log"
echo "$$(date -Iseconds) JOB_COMPLETE job=$$JOB_ID pbs=$$PBS_JOBID status=$$STATUS exit_code=$$EXIT_CODE work_dir=$$WORK_DIR" >> $$AGENT_LOG

# Trigger agent to submit waiting jobs (with file lock)
echo "Triggering agent to submit next jobs..."
flock -n $$CHASQUI_DIR/agent.lock -c "bash $$CHASQUI_DIR/agent.sh" &

# Exit with original VASP exit code
exit $$EXIT_CODE
"""

In [4]:
#| export

def generate_pbs_script(
    job_id: str,
    work_dir: str,
    job_name: Optional[str] = None,
    cores: int = 1,
    walltime: str = "48:00:00",
    project: str = "AARC1",
    vasp_version: str = "vasp_gam",
    chasqui_remote_dir: str = "~/chasqui_remote",
    output_path: Optional[str] = None
) -> str:
    """
    Generate PBS submission script for VASP job.
    
    Args:
        job_id: Unique job identifier (UUID from database)
        work_dir: Remote directory where VASP will run (contains POSCAR, INCAR, etc.)
        job_name: Human-readable job name (default: job_id)
        cores: Number of compute nodes to request (default: 1)
        walltime: Maximum runtime in HH:MM:SS format (default: "48:00:00")
        project: PBS account/project code (default: "AARC1")
        vasp_version: VASP executable name (default: "vasp_gam")
        chasqui_remote_dir: Remote chasqui directory (default: "~/chasqui_remote")
        output_path: If provided, write script to this file
        
    Returns:
        PBS script content as string
        
    Example:
        >>> script = generate_pbs_script(
        ...     job_id="abc-123-def",
        ...     work_dir="~/scratch/vasp_jobs/abc-123-def",
        ...     job_name="Au_bulk_relax",
        ...     cores=2,
        ...     walltime="24:00:00",
        ...     project="MyProject",
        ...     vasp_version="vasp_std"
        ... )
    """
    # Use job_id as name if not provided
    if job_name is None:
        job_name = f"chasqui_{job_id[:8]}"
    
    # Substitute template variables
    template = Template(PBS_TEMPLATE)
    script = template.safe_substitute(
        JOB_ID=job_id,
        JOB_NAME=job_name,
        WORK_DIR=work_dir,
        CORES=cores,
        TIME=walltime,
        PROJECT=project,
        VASP=vasp_version,
        CHASQUI_DIR=chasqui_remote_dir
    )
    
    # Write to file if requested
    if output_path:
        output_file = Path(output_path)
        output_file.parent.mkdir(parents=True, exist_ok=True)
        output_file.write_text(script)
    
    return script

## Job-Specific Script Generation

Generate script directly from database job entry.

In [5]:
#| export

def generate_pbs_script_from_job(
    job: Dict[str, Any],
    output_path: Optional[str] = None
) -> str:
    """
    Generate PBS script from database job entry.
    
    Args:
        job: Job dictionary from ChasquiDB.get_job()
        output_path: Optional path to write script
        
    Returns:
        PBS script content as string
        
    Example:
        >>> from chasqui.database import ChasquiDB
        >>> db = ChasquiDB()
        >>> job = db.get_job("abc-123")
        >>> script = generate_pbs_script_from_job(job)
        
    Note:
        If 'remote_work_dir' is not in vasp_config, it defaults to:
        ~/scratch/vasp_jobs/<job_id>
    """
    import json
    
    # Extract VASP config if present
    vasp_config = {}
    if job.get('vasp_config'):
        vasp_config = json.loads(job['vasp_config'])
    
    # Determine work directory
    # Priority: config > remote_path field > default convention
    work_dir = vasp_config.get('remote_work_dir') or \
               job.get('remote_path') or \
               f"~/scratch/vasp_jobs/{job['job_id']}"
    
    # Generate script with config parameters
    return generate_pbs_script(
        job_id=job['job_id'],
        work_dir=work_dir,
        job_name=vasp_config.get('job_name'),
        cores=vasp_config.get('cores', 1),
        walltime=vasp_config.get('walltime', '48:00:00'),
        project=vasp_config.get('project', 'AARC1'),
        vasp_version=vasp_config.get('vasp_version', 'vasp_gam'),
        chasqui_remote_dir=vasp_config.get('chasqui_remote_dir', '~/chasqui_remote'),
        output_path=output_path
    )

## Script Validation

Helper to validate generated scripts.

In [6]:
#| export

def validate_pbs_script(script: str) -> bool:
    """
    Validate that PBS script has required elements.
    
    Args:
        script: PBS script content
        
    Returns:
        True if script looks valid
        
    Raises:
        ValueError: If script is missing required elements
    """
    required_elements = [
        '#!/bin/bash',
        '#PBS -N',
        '#PBS -l select=',
        '#PBS -l walltime=',
        'WORK_DIR=',
        'cd $WORK_DIR',
        'module load vasp',
        'mpirun',
        'EXIT_CODE',
        'DONE',
        'FAIL',
        'agent.sh'
    ]
    
    missing = []
    for element in required_elements:
        if element not in script:
            missing.append(element)
    
    if missing:
        raise ValueError(f"PBS script missing required elements: {missing}")
    
    return True

## Tests

Verify script generation works correctly.

In [7]:
#| hide
import tempfile
import os

# Test basic script generation
script = generate_pbs_script(
    job_id="test-job-123",
    work_dir="~/scratch/vasp_jobs/test-job-123",
    job_name="test_vasp",
    cores=2,
    walltime="12:00:00",
    project="TestProject",
    vasp_version="vasp_std"
)

print("Generated script length:", len(script))
assert len(script) > 500, "Script too short"
assert "test_vasp" in script, "Job name not in script"
assert "test-job-123" in script, "Job ID not in script"
assert "~/scratch/vasp_jobs/test-job-123" in script, "Work dir not in script"
assert 'WORK_DIR="~/scratch/vasp_jobs/test-job-123"' in script, "WORK_DIR variable not set"
assert "cd $WORK_DIR" in script, "cd to WORK_DIR not in script"
assert "select=2:" in script, "Cores not set correctly"
assert "12:00:00" in script, "Walltime not set correctly"
assert "#PBS -A TestProject" in script, "Project not set correctly"
assert "vasp_std" in script, "VASP version not set correctly"
print("✓ Basic generation works")

# Test validation
try:
    validate_pbs_script(script)
    print("✓ Script validation passes")
except ValueError as e:
    print(f"✗ Validation failed: {e}")

# Test file writing
with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as f:
    temp_path = f.name

try:
    script = generate_pbs_script(
        job_id="file-test-456",
        work_dir="~/scratch/vasp_jobs/file-test-456",
        output_path=temp_path
    )
    
    assert os.path.exists(temp_path), "File not created"
    with open(temp_path) as f:
        content = f.read()
    assert "file-test-456" in content, "Job ID not in file"
    assert "~/scratch/vasp_jobs/file-test-456" in content, "Work dir not in file"
    print("✓ File writing works")
finally:
    os.unlink(temp_path)

# Test with job dictionary (simulated)
test_job = {
    'job_id': 'dict-job-789',
    'vasp_config': '{"job_name": "Au_relax", "cores": 4, "walltime": "06:00:00", "remote_work_dir": "~/my_vasp/Au_relax"}'
}

script = generate_pbs_script_from_job(test_job)
assert "Au_relax" in script, "Config job name not used"
assert "select=4:" in script, "Config cores not used"
assert "06:00:00" in script, "Config walltime not used"
assert "~/my_vasp/Au_relax" in script, "Config work_dir not used"
print("✓ Generation from job dict works")

# Test default work_dir convention
test_job_no_workdir = {
    'job_id': 'auto-workdir-999',
    'vasp_config': '{}'
}

script = generate_pbs_script_from_job(test_job_no_workdir)
assert "~/scratch/vasp_jobs/auto-workdir-999" in script, "Default work_dir convention not used"
print("✓ Default work_dir convention works")

print("\n✅ All tests passed!")

Generated script length: 2227
✓ Basic generation works
✓ Script validation passes
✓ File writing works
✓ Generation from job dict works
✓ Default work_dir convention works

✅ All tests passed!


## Usage Examples

### Basic Usage with Work Directory
```python
from chasqui.templates import generate_pbs_script

# Generate script - work_dir is REQUIRED
script = generate_pbs_script(
    job_id="abc-123-def-456",
    work_dir="~/scratch/vasp_jobs/abc-123-def-456",  # Where VASP inputs are
    job_name="Au_bulk_optimization",
    cores=2,
    walltime="24:00:00",
    project="MyResearchProject",
    vasp_version="vasp_std"
)

# Write to file
with open("job.sh", "w") as f:
    f.write(script)
```

### With Database Integration
```python
from chasqui.database import ChasquiDB
from chasqui.templates import generate_pbs_script_from_job

# Create job in database with remote work directory
db = ChasquiDB()
job_id = db.create_job(
    local_path="/path/to/vasp",
    vasp_config={
        "job_name": "Si_bandstructure",
        "cores": 4,
        "walltime": "12:00:00",
        "project": "MATERIALS2024",
        "vasp_version": "vasp_std",
        "remote_work_dir": f"~/scratch/vasp_jobs/{job_id}"  # Explicit work dir
    }
)

# Generate script from job
job = db.get_job(job_id)
script = generate_pbs_script_from_job(
    job,
    output_path=f"jobs/{job_id}.sh"
)
```

### Default Work Directory Convention
```python
# If you don't specify remote_work_dir in vasp_config,
# it defaults to: ~/scratch/vasp_jobs/<job_id>

db = ChasquiDB()
job_id = db.create_job(
    local_path="/path/to/vasp",
    vasp_config={
        "cores": 2,
        # No remote_work_dir specified
    }
)

job = db.get_job(job_id)
script = generate_pbs_script_from_job(job)
# Uses: ~/scratch/vasp_jobs/<job_id>
```

### Directory Structure on Remote

When you use this template, the remote structure looks like:
```
~/chasqui_remote/              # Management files
├── waiting/abc-123.sh         # PBS script
├── submitted/
└── completed/

~/scratch/vasp_jobs/           # Work directories
└── abc-123-def-456/           # VASP runs here
    ├── POSCAR                 # Input (uploaded by sync)
    ├── INCAR                  # Input (uploaded by sync)
    ├── KPOINTS                # Input (uploaded by sync)
    ├── POTCAR                 # Input (uploaded by sync)
    ├── OUTCAR                 # Output (created by VASP)
    ├── CONTCAR                # Output (created by VASP)
    └── abc-123.out            # PBS output
```

In [8]:
from chasqui.templates import generate_pbs_script

# Test 1: Generate script with explicit work directory
print("=" * 60)
print("Test 1: Basic script generation with work_dir")
print("=" * 60)

script = generate_pbs_script(
    job_id="test-001",
    work_dir="~/scratch/vasp_jobs/test-001",
    job_name="Au_bulk_test",
    cores=2,
    walltime="12:00:00",
    project="AARC1",
    vasp_version="vasp_std"
)

# Show key parts
lines = script.split('\n')
print(f"\nTotal lines: {len(lines)}")
print("\n--- First 30 lines ---")
print('\n'.join(lines[:30]))

print("\n--- Work directory setup (lines 15-25) ---")
print('\n'.join(lines[15:25]))

print("\n--- Last 10 lines ---")
print('\n'.join(lines[-10:]))

# Test 2: Verify work_dir is in the script
print("\n" + "=" * 60)
print("Test 2: Verify work_dir elements")
print("=" * 60)

checks = {
    'WORK_DIR variable set': 'WORK_DIR="~/scratch/vasp_jobs/test-001"' in script,
    'cd to WORK_DIR': 'cd $WORK_DIR' in script,
    'Error handling for cd': '{ echo "ERROR: Cannot cd to' in script,
    'Work dir in completion flag': 'echo "$$WORK_DIR"' in script,
    'Work dir in log': 'work_dir=$$WORK_DIR' in script,
}

for check, result in checks.items():
    status = "✓" if result else "✗"
    print(f"{status} {check}: {result}")

# Test 3: Generate from job dictionary
print("\n" + "=" * 60)
print("Test 3: Generate from job dict with work_dir")
print("=" * 60)

test_job = {
    'job_id': 'abc-123-def-456',
    'vasp_config': '''{
        "job_name": "Si_optimization",
        "cores": 4,
        "walltime": "24:00:00",
        "project": "MatSci",
        "remote_work_dir": "~/scratch/vasp_jobs/Si_opt_001"
    }'''
}

from chasqui.templates import generate_pbs_script_from_job
script2 = generate_pbs_script_from_job(test_job)

# Check that custom work_dir is used
assert "Si_opt_001" in script2, "Custom work_dir not used!"
assert "Si_optimization" in script2, "Job name not used!"
assert "select=4:" in script2, "Cores not set!"
print("✓ Custom work_dir from config: ~/scratch/vasp_jobs/Si_opt_001")
print("✓ Job name: Si_optimization")
print("✓ Cores: 4")

# Test 4: Default work_dir convention
print("\n" + "=" * 60)
print("Test 4: Default work_dir convention")
print("=" * 60)

test_job_default = {
    'job_id': 'xyz-789-default',
    'vasp_config': '{"cores": 2}'  # No remote_work_dir specified
}

script3 = generate_pbs_script_from_job(test_job_default)
expected_workdir = "~/scratch/vasp_jobs/xyz-789-default"
assert expected_workdir in script3, "Default work_dir convention not applied!"
print(f"✓ Default work_dir applied: {expected_workdir}")

# Test 5: Write to file
print("\n" + "=" * 60)
print("Test 5: Write to file")
print("=" * 60)

import tempfile
import os

with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as f:
    temp_path = f.name

try:
    script4 = generate_pbs_script(
        job_id="file-test-999",
        work_dir="~/scratch/vasp_jobs/file-test-999",
        job_name="file_test",
        output_path=temp_path
    )
    
    print(f"✓ Script written to: {temp_path}")
    
    # Read it back
    with open(temp_path) as f:
        content = f.read()
    
    print(f"✓ File size: {len(content)} bytes")
    print(f"✓ Contains work_dir: {'WORK_DIR=' in content}")
    
finally:
    os.unlink(temp_path)
    print("✓ Temp file cleaned up")

print("\n" + "=" * 60)
print("🎉 ALL TESTS PASSED!")
print("=" * 60)

Test 1: Basic script generation with work_dir

Total lines: 84

--- First 30 lines ---
#!/bin/bash

#PBS -N Au_bulk_test
#PBS -l select=2:ncpus=36:mpiprocs=36
#PBS -A AARC1
#PBS -l walltime=12:00:00
#PBS -j oe
#PBS -o Au_bulk_test.out

# Job metadata for chasqui
JOB_ID="test-001"
CHASQUI_DIR="~/chasqui_remote"
WORK_DIR="~/scratch/vasp_jobs/test-001"

# Change to work directory where VASP inputs are located
cd $WORK_DIR || { echo "ERROR: Cannot cd to $WORK_DIR"; exit 1; }

echo "Job: Au_bulk_test"
echo "Job ID: $PBS_JOBID"
echo "Chasqui ID: $JOB_ID"
echo "Work Directory: $WORK_DIR"
echo "Started: $(date)"

# Get node count from PBS
NNODES=`wc -l < $PBS_NODEFILE`
echo "Nodes allocated: $NNODES"

# Environment setup

--- Work directory setup (lines 15-25) ---
cd $WORK_DIR || { echo "ERROR: Cannot cd to $WORK_DIR"; exit 1; }

echo "Job: Au_bulk_test"
echo "Job ID: $PBS_JOBID"
echo "Chasqui ID: $JOB_ID"
echo "Work Directory: $WORK_DIR"
echo "Started: $(date)"


--- Last 10 lines ---
AGENT_L

In [9]:
import inspect
from chasqui.templates import generate_pbs_script

print(inspect.signature(generate_pbs_script))

(job_id: str, work_dir: str, job_name: Optional[str] = None, cores: int = 1, walltime: str = '48:00:00', project: str = 'AARC1', vasp_version: str = 'vasp_gam', chasqui_remote_dir: str = '~/chasqui_remote', output_path: Optional[str] = None) -> str
