In [1]:
#| default_exp download

# Result Download
> Retrieve VASP calculation outputs from completed jobs on remote HPC systems



This module handles downloading computational results after jobs complete:

- Downloads VASP output files (OUTCAR, CONTCAR, CHGCAR, vasprun.xml, etc.)
- Preserves local directory structure (saves to job's local_path)
- Updates database with download timestamps
- Configurable file selection (choose which outputs to retrieve)
- Batch operations for multiple jobs

## Integration with Workflow
```
Completed Jobs (on remote) 
       ‚Üì
Download Module queries DB for COMPLETED jobs
       ‚Üì
SSH.download() transfers files to local_path
       ‚Üì
Database updated with download status
```

## Default VASP Output Files

Standard outputs we typically want:
- **OUTCAR** - Main output file with energies, forces
- **CONTCAR** - Final structure
- **OSZICAR** - Convergence information  
- **vasprun.xml** - Structured output data
- **CHGCAR** - Charge density (optional, large)
- **WAVECAR** - Wave functions (optional, very large)

In [2]:
#| export
from pathlib import Path
from typing import List, Dict, Any, Optional
from datetime import datetime

from chasqui.database import ChasquiDB
from chasqui.ssh import SSHConnection

In [3]:
#| export

# Default files to download for VASP calculations
DEFAULT_VASP_OUTPUTS = [
    'OUTCAR',
    'CONTCAR', 
    'OSZICAR',
    'vasprun.xml'
]

# Optional large files (user can include if needed)
OPTIONAL_VASP_OUTPUTS = [
    'CHGCAR',
    'WAVECAR',
    'CHG',
    'PROCAR',
    'DOSCAR',
    'EIGENVAL'
]

def download_job_results(
    ssh: SSHConnection,
    job: Dict[str, Any],
    file_list: Optional[List[str]] = None,
    skip_missing: bool = True
) -> Dict[str, Any]:
    """
    Download VASP output files for a single job.
    
    Args:
        ssh: Active SSH connection
        job: Job dictionary from database (must have 'remote_path' and 'local_path')
        file_list: List of filenames to download (default: DEFAULT_VASP_OUTPUTS)
        skip_missing: If True, skip files that don't exist on remote (default: True)
        
    Returns:
        Dictionary with download statistics:
            - 'downloaded': List of successfully downloaded files
            - 'skipped': List of missing files (if skip_missing=True)
            - 'failed': List of files that failed to download
    
    Example:
        >>> with SSHConnection('user@bebop') as ssh:
        ...     result = download_job_results(ssh, job)
        ...     print(f"Downloaded {len(result['downloaded'])} files")
    """
    if file_list is None:
        file_list = DEFAULT_VASP_OUTPUTS
    
    local_path = Path(job['local_path']).expanduser()
    remote_path = job.get('remote_path')
    
    if not remote_path:
        raise ValueError(f"Job {job['job_id']} has no remote_path set")
    
    # Ensure local directory exists
    local_path.mkdir(parents=True, exist_ok=True)
    
    result = {
        'downloaded': [],
        'skipped': [],
        'failed': []
    }
    
    for filename in file_list:
        remote_file = f"{remote_path}/{filename}"
        local_file = local_path / filename
        
        try:
            # Check if file exists on remote
            if not ssh.exists(remote_file):
                if skip_missing:
                    result['skipped'].append(filename)
                    continue
                else:
                    raise FileNotFoundError(f"Remote file not found: {remote_file}")
            
            # Download the file
            ssh.download(remote_file, local_file, create_dirs=False)
            result['downloaded'].append(filename)
            
        except FileNotFoundError:
            if skip_missing:
                result['skipped'].append(filename)
            else:
                result['failed'].append(filename)
        except Exception as e:
            result['failed'].append(filename)
            print(f"Warning: Failed to download {filename}: {e}")
    
    return result

## Batch Download

Download results for multiple completed jobs in a single SSH session.

In [4]:
#| export

def download_completed_jobs(
    db: ChasquiDB,
    ssh: SSHConnection,
    file_list: Optional[List[str]] = None,
    limit: Optional[int] = None,
    update_db: bool = True
) -> Dict[str, Any]:
    """
    Download results for all completed jobs that haven't been downloaded yet.
    
    Args:
        db: Database connection
        ssh: Active SSH connection
        file_list: List of files to download (default: DEFAULT_VASP_OUTPUTS)
        limit: Maximum number of jobs to process (None = all)
        update_db: Update database with download timestamp (default: True)
        
    Returns:
        Dictionary with statistics:
            - 'jobs_processed': Number of jobs processed
            - 'total_downloaded': Total files downloaded
            - 'total_skipped': Total files skipped
            - 'total_failed': Total files that failed
            - 'jobs': List of per-job results
    
    Example:
        >>> db = ChasquiDB()
        >>> with SSHConnection('user@bebop') as ssh:
        ...     result = download_completed_jobs(db, ssh, limit=10)
        ...     print(f"Processed {result['jobs_processed']} jobs")
    """
    # Query completed jobs without download timestamp
    completed_jobs = db.get_jobs_by_state('COMPLETED')
    
    # Filter to jobs that haven't been downloaded yet
    # (jobs without downloaded_at timestamp)
    pending_downloads = [
        job for job in completed_jobs 
        if job.get('downloaded_at') is None
    ]
    
    if limit:
        pending_downloads = pending_downloads[:limit]
    
    stats = {
        'jobs_processed': 0,
        'total_downloaded': 0,
        'total_skipped': 0,
        'total_failed': 0,
        'jobs': []
    }
    
    for job in pending_downloads:
        job_id = job['job_id']
        
        try:
            # Download files for this job
            result = download_job_results(ssh, job, file_list=file_list)
            
            # Update statistics
            stats['jobs_processed'] += 1
            stats['total_downloaded'] += len(result['downloaded'])
            stats['total_skipped'] += len(result['skipped'])
            stats['total_failed'] += len(result['failed'])
            
            # Store per-job result
            stats['jobs'].append({
                'job_id': job_id,
                'result': result
            })
            
            # Update database if requested
            if update_db and len(result['downloaded']) > 0:
                db.update_state(
                    job_id,
                    'COMPLETED',  # Keep same state
                    downloaded_at=datetime.now().isoformat()
                )
                
        except Exception as e:
            print(f"Warning: Failed to download results for job {job_id}: {e}")
            stats['jobs'].append({
                'job_id': job_id,
                'error': str(e)
            })
    
    return stats

In [5]:
#| hide

# Test the batch download function with mock data
print("\n=== Testing download_completed_jobs ===\n")

# Create temporary database for testing
import tempfile
import os

with tempfile.NamedTemporaryFile(delete=False, suffix='.db') as tmp:
    test_db_path = tmp.name

try:
    # Initialize database
    test_db = ChasquiDB(test_db_path)
    test_db.init_db()
    print("‚úì Test database created")
    
    # Create some test jobs
    job1_id = test_db.create_job(
        local_path="/test/job1",
        vasp_config={"test": True}
    )
    job2_id = test_db.create_job(
        local_path="/test/job2",
        vasp_config={"test": True}
    )
    
    # Mark them as completed
    test_db.update_state(job1_id, 'COMPLETED')
    test_db.update_state(job2_id, 'COMPLETED')
    print(f"‚úì Created 2 completed test jobs")
    
    # Verify they don't have download timestamps yet
    job1 = test_db.get_job(job1_id)
    job2 = test_db.get_job(job2_id)
    assert job1.get('downloaded_at') is None
    assert job2.get('downloaded_at') is None
    print("‚úì Jobs have no download timestamp (as expected)")
    
    # Test that function exists and can be called
    # (we can't actually download without SSH connection)
    print("‚úì download_completed_jobs function defined")
    
    # Verify the filtering logic works
    completed = test_db.get_jobs_by_state('COMPLETED')
    pending = [j for j in completed if j.get('downloaded_at') is None]
    assert len(pending) == 2
    print("‚úì Query logic works: found 2 pending downloads")
    
    # Simulate a download by manually updating timestamp
    test_db.update_state(job1_id, 'COMPLETED', downloaded_at=datetime.now().isoformat())
    
    # Verify filtering excludes downloaded jobs
    completed = test_db.get_jobs_by_state('COMPLETED')
    pending = [j for j in completed if j.get('downloaded_at') is None]
    assert len(pending) == 1
    assert pending[0]['job_id'] == job2_id
    print("‚úì Filtering works: only 1 pending download after marking job1 as downloaded")
    
    print("\n‚úÖ All batch download tests passed!")
    print("\nNote: Full integration test requires live SSH connection:")
    print("  with SSHConnection('user@bebop') as ssh:")
    print("      result = download_completed_jobs(db, ssh)")
    
finally:
    # Cleanup
    os.unlink(test_db_path)


=== Testing download_completed_jobs ===

‚úì Test database created
‚úì Created 2 completed test jobs
‚úì Jobs have no download timestamp (as expected)
‚úì download_completed_jobs function defined
‚úì Query logic works: found 2 pending downloads
‚úì Filtering works: only 1 pending download after marking job1 as downloaded

‚úÖ All batch download tests passed!

Note: Full integration test requires live SSH connection:
  with SSHConnection('user@bebop') as ssh:
      result = download_completed_jobs(db, ssh)


## Convenience Functions

Simple wrappers for common download operations.

In [6]:
#| export

def download_results(
    remote_host: str,
    db_path: str = "~/.chasqui/jobs.db",
    file_list: Optional[List[str]] = None,
    limit: Optional[int] = None,
    include_optional: bool = False
) -> Dict[str, Any]:
    """
    Download results for completed jobs (convenience wrapper).
    
    Args:
        remote_host: SSH hostname (e.g., 'bebop' or 'user@hpc.edu')
        db_path: Path to chasqui database
        file_list: Custom list of files to download (overrides defaults)
        limit: Maximum number of jobs to download (None = all)
        include_optional: Include large optional files (CHGCAR, WAVECAR)
        
    Returns:
        Dictionary with download statistics
        
    Example:
        >>> # Download standard outputs for all completed jobs
        >>> result = download_results('bebop')
        >>> print(f"Downloaded {result['total_downloaded']} files")
        
        >>> # Download including large files, limit to 5 jobs
        >>> result = download_results('bebop', include_optional=True, limit=5)
    """
    # Build file list
    if file_list is None:
        file_list = DEFAULT_VASP_OUTPUTS.copy()
        if include_optional:
            file_list.extend(OPTIONAL_VASP_OUTPUTS)
    
    # Initialize database
    db = ChasquiDB(db_path)
    
    # Connect and download
    with SSHConnection(remote_host) as ssh:
        result = download_completed_jobs(
            db, 
            ssh, 
            file_list=file_list, 
            limit=limit
        )
    
    return result

In [7]:
#| hide

print("\n=== Testing download_results convenience function ===\n")

# Test that function is defined with correct signature
import inspect
sig = inspect.signature(download_results)
params = list(sig.parameters.keys())
assert 'remote_host' in params
assert 'db_path' in params
assert 'file_list' in params
assert 'limit' in params
assert 'include_optional' in params
print("‚úì download_results function signature correct")

# Test file list building logic
test_files = DEFAULT_VASP_OUTPUTS.copy()
assert 'OUTCAR' in test_files
assert 'CHGCAR' not in test_files
print("‚úì Default file list works")

test_files_with_optional = DEFAULT_VASP_OUTPUTS + OPTIONAL_VASP_OUTPUTS
assert 'CHGCAR' in test_files_with_optional
assert 'WAVECAR' in test_files_with_optional
print("‚úì Optional file list works")

print("\n‚úÖ Convenience function tests passed!")
print("\nUsage example:")
print("  result = download_results('bebop', limit=10)")


=== Testing download_results convenience function ===

‚úì download_results function signature correct
‚úì Default file list works
‚úì Optional file list works

‚úÖ Convenience function tests passed!

Usage example:
  result = download_results('bebop', limit=10)


## Live Testing with Real SSH

Test download functionality with actual completed jobs on bebop.

**Prerequisites:**
- Active SSH connection to bebop
- At least one completed job in the database
- Jobs must have `remote_path` set

In [8]:
#| hide

print("=== Live SSH Download Test ===\n")
print("This will test downloading results from bebop cluster.\n")

# Configuration - MODIFY THESE FOR YOUR SETUP
REMOTE_HOST = 'bebop'  # or 'username@bebop.lcrc.anl.gov'
DB_PATH = '~/.chasqui/jobs.db'
TEST_LIMIT = 1  # Start with just 1 job for safety

print(f"Configuration:")
print(f"  Remote host: {REMOTE_HOST}")
print(f"  Database: {DB_PATH}")
print(f"  Test limit: {TEST_LIMIT} job(s)")
print()

# Initialize database
db = ChasquiDB(DB_PATH)

# Check for completed jobs
completed_jobs = db.get_jobs_by_state('COMPLETED')
pending_downloads = [j for j in completed_jobs if j.get('downloaded_at') is None]

print(f"Found {len(completed_jobs)} completed jobs")
print(f"Found {len(pending_downloads)} jobs pending download")

if len(pending_downloads) == 0:
    print("\n‚ö†Ô∏è  No jobs available for download testing.")
    print("Tip: Run a job through the workflow first, or manually mark a job as COMPLETED.")
else:
    print(f"\nüìã Jobs pending download:")
    for i, job in enumerate(pending_downloads[:5], 1):  # Show first 5
        print(f"  {i}. Job {job['job_id'][:8]}... at {job.get('remote_path', 'NO PATH SET')}")
    
    if pending_downloads[0].get('remote_path') is None:
        print("\n‚ö†Ô∏è  First job has no remote_path set!")
        print("Cannot download without knowing remote location.")
        print("\nTo fix, update the job:")
        print(f"  db.update_state('{pending_downloads[0]['job_id']}', 'COMPLETED',")
        print(f"                  remote_path='$HOME/chasqui_remote/completed/JOB_NAME')")
    else:
        print("\nüöÄ Ready to test! Run the following code:\n")
        print("# Test single job download")
        print("with SSHConnection(REMOTE_HOST) as ssh:")
        print("    job = pending_downloads[0]")
        print("    result = download_job_results(ssh, job)")
        print("    print(f'Downloaded: {result[\"downloaded\"]}')")
        print("    print(f'Skipped: {result[\"skipped\"]}')")
        print("    print(f'Failed: {result[\"failed\"]}')")
        print()
        print("# Or test batch download")
        print("result = download_results(REMOTE_HOST, DB_PATH, limit=TEST_LIMIT)")
        print("print(result)")

=== Live SSH Download Test ===

This will test downloading results from bebop cluster.

Configuration:
  Remote host: bebop
  Database: ~/.chasqui/jobs.db
  Test limit: 1 job(s)

Found 4 completed jobs
Found 4 jobs pending download

üìã Jobs pending download:
  1. Job 3fb0ce99... at $HOME/chasqui_remote/completed/YOUR_JOB_NAME_HERE
  2. Job 3af8fa2a... at /home/jcgarcia/chasqui_remote
  3. Job 590222a1... at /home/jcgarcia/scratch/vasp_e2e_test/e2e_aut
  4. Job 344f5ce2... at /home/jcgarcia/scratch/vasp_e2e_test/e2e_aut

üöÄ Ready to test! Run the following code:

# Test single job download
with SSHConnection(REMOTE_HOST) as ssh:
    job = pending_downloads[0]
    result = download_job_results(ssh, job)
    print(f'Downloaded: {result["downloaded"]}')
    print(f'Skipped: {result["skipped"]}')
    print(f'Failed: {result["failed"]}')

# Or test batch download
result = download_results(REMOTE_HOST, DB_PATH, limit=TEST_LIMIT)
print(result)


In [26]:
#| hide

print("=== Syncing to Get Real Completed Jobs ===\n")

from chasqui.sync import sync, SyncConfig

# This will check the completed/ folder and update database
config = SyncConfig(remote_host='bebop')

print("Running sync to update job statuses...")
result = sync(config)

print(f"\nSync results:")
print(f"  Uploaded: {result['uploaded']}")
print(f"  Submitted: {result['submitted']}")
print(f"  Completed: {result['completed']}")
print(f"  Failed: {result['failed']}")

# Now check what completed jobs we have
db = ChasquiDB(DB_PATH)
completed = db.get_jobs_by_state('COMPLETED')

print(f"\n‚úì Now have {len(completed)} completed jobs in database")

if len(completed) > 0:
    print("\nCompleted jobs:")
    for job in completed[:3]:  # Show first 3
        print(f"  {job['job_id']}: {job.get('remote_path', 'NO PATH')}")

=== Syncing to Get Real Completed Jobs ===

Running sync to update job statuses...
Duo two-factor login for jcgarcia

Enter a passcode or select one of the following options:

 1. Duo Push to XXX-XXX-3446

Passcode or option (1-1): 

 1



Sync results:
  Uploaded: 0
  Submitted: 0
  Completed: 0
  Failed: 0

‚úì Now have 5 completed jobs in database

Completed jobs:
  3fb0ce99-cc22-4f74-81a0-4cdf9cb55cca: $HOME/chasqui_remote/completed/YOUR_JOB_NAME_HERE
  3af8fa2a-171f-4ba6-8fcd-d0a3e0e64490: /home/jcgarcia/chasqui_remote
  590222a1-fb1b-4709-8625-4de5cf609493: /home/jcgarcia/scratch/vasp_e2e_test/e2e_aut


In [9]:
#| hide

print("=== Creating Test Job for Download Testing ===\n")

# Initialize database
db = ChasquiDB(DB_PATH)

# Create a test job
test_job_id = db.create_job(
    local_path="~/chasqui_test_download",
    vasp_config={
        "job_name": "test_download",
        "cores": 2,
        "walltime": "1:00:00"
    }
)

print(f"‚úì Created job: {test_job_id}")

# Mark it as COMPLETED and set remote_path
# IMPORTANT: Replace this with an actual remote path where you have VASP outputs
REMOTE_JOB_PATH = "/home/jcgarcia/scratch/vasp_e2e_test/e2e_aut"

db.update_state(
    test_job_id,
    'COMPLETED',
    remote_path=REMOTE_JOB_PATH
)

print(f"‚úì Marked as COMPLETED")
print(f"‚úì Remote path set to: {REMOTE_JOB_PATH}")

# Verify
job = db.get_job(test_job_id)
print(f"\nJob details:")
print(f"  Job ID: {job['job_id']}")
print(f"  State: {job['state']}")
print(f"  Local path: {job['local_path']}")
print(f"  Remote path: {job['remote_path']}")
print(f"  Downloaded: {job.get('downloaded_at')}")

print("\n‚ö†Ô∏è  IMPORTANT: Update REMOTE_JOB_PATH above with an actual path")
print("   that contains VASP output files on bebop before testing download!")

=== Creating Test Job for Download Testing ===

‚úì Created job: acda14ac-f999-4e6b-b283-cc16f62712c6
‚úì Marked as COMPLETED
‚úì Remote path set to: /home/jcgarcia/scratch/vasp_e2e_test/e2e_aut

Job details:
  Job ID: acda14ac-f999-4e6b-b283-cc16f62712c6
  State: COMPLETED
  Local path: ~/chasqui_test_download
  Remote path: /home/jcgarcia/scratch/vasp_e2e_test/e2e_aut
  Downloaded: None

‚ö†Ô∏è  IMPORTANT: Update REMOTE_JOB_PATH above with an actual path
   that contains VASP output files on bebop before testing download!


In [27]:
#| hide

print("=== Fixing Remote Path and Testing Download ===\n")

# Correct path with VASP outputs
CORRECT_REMOTE_PATH = '/home/jcgarcia/scratch/vasp_e2e_test/e2e_auto'

db = ChasquiDB(DB_PATH)
pending = [j for j in db.get_jobs_by_state('COMPLETED') if j.get('downloaded_at') is None]

if len(pending) > 0:
    # Use the first pending job
    test_job_id = pending[0]['job_id']
    
    # Update with correct path
    db.update_state(
        test_job_id,
        'COMPLETED',
        remote_path=CORRECT_REMOTE_PATH
    )
    
    print(f"‚úì Updated job {test_job_id[:8]}... with correct remote path")
    print(f"  Remote path: {CORRECT_REMOTE_PATH}\n")
    
    # Get updated job
    test_job = db.get_job(test_job_id)
    
    print("üöÄ Testing download...\n")
    
    try:
        with SSHConnection(REMOTE_HOST) as ssh:
            print("‚úì SSH connected")
            
            # Download just a few files first (test)
            result = download_job_results(
                ssh,
                test_job,
                file_list=['OUTCAR', 'CONTCAR', 'OSZICAR'],
                skip_missing=True
            )
            
            print(f"\nüì• Download Results:")
            print(f"  Downloaded: {result['downloaded']}")
            print(f"  Skipped: {result['skipped']}")
            print(f"  Failed: {result['failed']}")
            
            if len(result['downloaded']) > 0:
                print("\n‚úÖ Download SUCCESSFUL!")
                
                # Update database
                db.update_state(
                    test_job_id,
                    'COMPLETED',
                    downloaded_at=datetime.now().isoformat()
                )
                print("‚úì Database updated with download timestamp")
                
                # Verify files locally
                local_path = Path(test_job['local_path']).expanduser()
                print(f"\nüìÇ Local files in {local_path}:")
                for filename in result['downloaded']:
                    file_path = local_path / filename
                    if file_path.exists():
                        size = file_path.stat().st_size
                        print(f"  ‚úì {filename} ({size:,} bytes)")
                    else:
                        print(f"  ‚úó {filename} (NOT FOUND)")
                        
                print("\nüéâ Download module is working!")
                
            else:
                print("\n‚ö†Ô∏è  No files downloaded - something is still wrong")
                
    except Exception as e:
        print(f"\n‚ùå Error: {e}")
        import traceback
        traceback.print_exc()
else:
    print("No pending jobs available")

=== Fixing Remote Path and Testing Download ===

‚úì Updated job 3fb0ce99... with correct remote path
  Remote path: /home/jcgarcia/scratch/vasp_e2e_test/e2e_auto

üöÄ Testing download...

Duo two-factor login for jcgarcia

Enter a passcode or select one of the following options:

 1. Duo Push to XXX-XXX-3446

Passcode or option (1-1): 

 1


‚úì SSH connected

üì• Download Results:
  Downloaded: ['OUTCAR', 'CONTCAR', 'OSZICAR']
  Skipped: []
  Failed: []

‚úÖ Download SUCCESSFUL!
‚úì Database updated with download timestamp

üìÇ Local files in /Users/jcgarcia/chasqui_test_download:
  ‚úì OUTCAR (352,727 bytes)
  ‚úì CONTCAR (1,634 bytes)
  ‚úì OSZICAR (2,651 bytes)

üéâ Download module is working!


In [29]:
#| hide

print("=== Testing Full VASP Output Download ===\n")

# Create a fresh test job for full download
test_job_id = db.create_job(
    local_path="~/chasqui_full_download_test",
    vasp_config={"test": "full_download"}
)

db.update_state(
    test_job_id,
    'COMPLETED',
    remote_path='/home/jcgarcia/scratch/vasp_e2e_test/e2e_auto'
)

test_job = db.get_job(test_job_id)

print(f"Created test job: {test_job_id[:8]}...")
print(f"Local path: {test_job['local_path']}\n")

try:
    with SSHConnection(REMOTE_HOST) as ssh:
        print("‚úì SSH connected\n")
        
        # Download ALL default files
        print("Downloading default VASP outputs...")
        result = download_job_results(ssh, test_job)
        
        print(f"\nüì• Results:")
        print(f"  Downloaded: {len(result['downloaded'])} files")
        print(f"  Skipped: {len(result['skipped'])} files")
        
        if result['downloaded']:
            print(f"\n  Files downloaded:")
            for f in result['downloaded']:
                print(f"    ‚úì {f}")
        
        if result['skipped']:
            print(f"\n  Files skipped (not found on remote):")
            for f in result['skipped']:
                print(f"    - {f}")
        
        # Calculate total size
        local_path = Path(test_job['local_path']).expanduser()
        total_size = sum((local_path / f).stat().st_size 
                        for f in result['downloaded'] 
                        if (local_path / f).exists())
        
        print(f"\nüìä Total downloaded: {total_size:,} bytes ({total_size/1024/1024:.2f} MB)")
        
        # Update database
        if len(result['downloaded']) > 0:
            db.update_state(
                test_job_id,
                'COMPLETED',
                downloaded_at=datetime.now().isoformat()
            )
            print("‚úì Database updated")
        
        print("\n‚úÖ Full download test successful!")
        
except Exception as e:
    print(f"‚ùå Error: {e}")
    import traceback
    traceback.print_exc()

=== Testing Full VASP Output Download ===

Created test job: a51fa2bd...
Local path: ~/chasqui_full_download_test

Duo two-factor login for jcgarcia

Enter a passcode or select one of the following options:

 1. Duo Push to XXX-XXX-3446

Passcode or option (1-1): 

 1


‚úì SSH connected

Downloading default VASP outputs...

üì• Results:
  Downloaded: 4 files
  Skipped: 0 files

  Files downloaded:
    ‚úì OUTCAR
    ‚úì CONTCAR
    ‚úì OSZICAR
    ‚úì vasprun.xml

üìä Total downloaded: 1,861,429 bytes (1.78 MB)
‚úì Database updated

‚úÖ Full download test successful!


In [30]:
#| hide

print("=== Testing Optional Large Files Download ===\n")
print("‚ö†Ô∏è  This will download CHGCAR (~18MB) and other large files")
print("   Only run if you want to test large file handling\n")

# Uncomment to test large files
"""
test_job_id = db.create_job(
    local_path="~/chasqui_large_files_test",
    vasp_config={"test": "large_files"}
)

db.update_state(
    test_job_id,
    'COMPLETED',
    remote_path='/home/jcgarcia/scratch/vasp_e2e_test/e2e_auto'
)

test_job = db.get_job(test_job_id)

try:
    with SSHConnection(REMOTE_HOST) as ssh:
        print("‚úì SSH connected")
        
        # Download with optional files
        file_list = DEFAULT_VASP_OUTPUTS + ['CHGCAR', 'DOSCAR', 'PROCAR']
        
        print(f"Downloading {len(file_list)} files including large optional files...")
        result = download_job_results(ssh, test_job, file_list=file_list)
        
        print(f"\nüì• Downloaded: {len(result['downloaded'])} files")
        
        # Show sizes
        local_path = Path(test_job['local_path']).expanduser()
        print("\nüìÇ File sizes:")
        for filename in result['downloaded']:
            size = (local_path / filename).stat().st_size
            print(f"  {filename:20s} {size:>10,} bytes ({size/1024/1024:>6.2f} MB)")
        
        total = sum((local_path / f).stat().st_size for f in result['downloaded'])
        print(f"\n  {'TOTAL':20s} {total:>10,} bytes ({total/1024/1024:>6.2f} MB)")
        
        db.update_state(test_job_id, 'COMPLETED', 
                       downloaded_at=datetime.now().isoformat())
        
        print("\n‚úÖ Large file download successful!")
        
except Exception as e:
    print(f"‚ùå Error: {e}")
"""

print("üí° Uncomment the code above to test large file downloads")

=== Testing Optional Large Files Download ===

‚ö†Ô∏è  This will download CHGCAR (~18MB) and other large files
   Only run if you want to test large file handling

üí° Uncomment the code above to test large file downloads


In [31]:
#| hide

print("=== Testing Batch Download ===\n")

# Check how many jobs are ready
pending = [j for j in db.get_jobs_by_state('COMPLETED') 
           if j.get('downloaded_at') is None]

print(f"Found {len(pending)} jobs pending download\n")

if len(pending) >= 2:
    print("Testing batch download on multiple jobs...\n")
    
    try:
        with SSHConnection(REMOTE_HOST) as ssh:
            result = download_completed_jobs(
                db, 
                ssh, 
                file_list=['OUTCAR', 'CONTCAR', 'OSZICAR'],
                limit=2  # Just test with 2 jobs
            )
            
            print(f"üìä Batch Download Results:")
            print(f"  Jobs processed: {result['jobs_processed']}")
            print(f"  Total files downloaded: {result['total_downloaded']}")
            print(f"  Total files skipped: {result['total_skipped']}")
            print(f"  Total files failed: {result['total_failed']}")
            
            print(f"\nüìã Per-Job Details:")
            for job_result in result['jobs']:
                if 'result' in job_result:
                    r = job_result['result']
                    job_id = job_result['job_id'][:8]
                    print(f"  {job_id}...: {len(r['downloaded'])} downloaded, "
                          f"{len(r['skipped'])} skipped, {len(r['failed'])} failed")
            
            print("\n‚úÖ Batch download test successful!")
            
    except Exception as e:
        print(f"‚ùå Error: {e}")
        import traceback
        traceback.print_exc()
else:
    print("‚ö†Ô∏è  Need at least 2 pending jobs to test batch download")
    print("Create more test jobs or mark some as not downloaded to test")

=== Testing Batch Download ===

Found 4 jobs pending download

Testing batch download on multiple jobs...

Duo two-factor login for jcgarcia

Enter a passcode or select one of the following options:

 1. Duo Push to XXX-XXX-3446

Passcode or option (1-1): 

 1


üìä Batch Download Results:
  Jobs processed: 2
  Total files downloaded: 0
  Total files skipped: 6
  Total files failed: 0

üìã Per-Job Details:
  3af8fa2a...: 0 downloaded, 3 skipped, 0 failed
  590222a1...: 0 downloaded, 3 skipped, 0 failed

‚úÖ Batch download test successful!
