In [None]:
# 15-2 Using the Galaxy API

In [None]:
# Install BioBlend
! pip install bioblend

In [None]:
# Import Libraries
from bioblend.galaxy import GalaxyInstance
import time
import os

In [None]:
# Get your Galaxy API key
# 1. Go to User → Preferences → Manage API Key
# 2. Click "Create a new key" - Generate a new key and copy it to your clipboard
# 3. Replace your_api_key_here with your actual API key

In [None]:
# Download fastq files (assumes fastq-dump is installed)
! fastq-dump --split-files --gzip -X 25000 SRR1039508
! fastq-dump --split-files --gzip -X 25000 SRR1039509

In [None]:
# Rename files for ease of use
! mv SRR1039508_1.fastq.gz sample1_R1.fastq.gz
! mv SRR1039508_2.fastq.gz sample1_R2.fastq.gz
! mv SRR1039509_1.fastq.gz sample2_R1.fastq.gz
! mv SRR1039509_2.fastq.gz sample2_R2.fastq.gz

In [None]:
"""
Galaxy API Example: Simple RNA-seq Quality Control Workflow

This script demonstrates how to:
1. Connect to Galaxy API
2. Upload FASTQ files
3. Run FastQC for quality assessment
4. Run MultiQC to aggregate results
5. Download the final report

Prerequisites:
- Galaxy instance running (local Docker or public server)
- bioblend library: pip install bioblend
- Sample FASTQ files
"""

In [15]:
# Configuration
#GALAXY_URL = "http://localhost:8080"  # Your Galaxy instance URL 
GALAXY_URL = "https://usegalaxy.org"  # Or use a public server 
#API_KEY = "your_api_key_here"         # Generate this in Galaxy User -> Preferences -> API Keys
API_KEY = "03d98348210b74b8b9a0cc15dc66180b"         # Generate this in Galaxy User -> Preferences -> API Keys
HISTORY_NAME = "Simple File Upload"

In [17]:
"""
Simple Galaxy API Example - File Upload and Basic Operations

This is a minimal example that focuses on core Galaxy API functionality
that should work with any Galaxy installation, even without specialized tools.
"""

from bioblend.galaxy import GalaxyInstance
import time
import os

def main():
    # Connect to Galaxy
    print(f"Connecting to Galaxy at {GALAXY_URL}")
    gi = GalaxyInstance(url=GALAXY_URL, key=API_KEY)
    
    # Test connection
    try:
        version = gi.config.get_version()
        print(f"Connected to Galaxy version: {version}")
    except Exception as e:
        print(f"Connection failed: {e}")
        return
    
    # Create a new history
    print(f"Creating new history: {HISTORY_NAME}")
    history = gi.histories.create_history(name=HISTORY_NAME)
    history_id = history['id']
    print(f"History created with ID: {history_id}")
    
    # List available tools (for debugging)
    print("\nListing some available tools:")
    try:
        tools = gi.tools.get_tools()
        print(f"Total tools available: {len(tools)}")
        
        # Show first 10 tools
        for i, tool in enumerate(tools[:10]):
            print(f"  {tool['id']} - {tool['name']}")
        
        # Look for common tools
        common_tools = ['upload1', 'cat1', 'Cut1', 'sort1', 'Grep1']
        available_common = []
        for tool in tools:
            if any(common in tool['id'] for common in common_tools):
                available_common.append(tool['id'])
        
        print(f"\nCommon tools found: {available_common}")
        
    except Exception as e:
        print(f"Could not list tools: {e}")
    
    # Upload sample files
    sample_files = [
        "sample1_R1.fastq.gz",
        "sample1_R2.fastq.gz"
    ]
    
    uploaded_datasets = []
    
    print("\nUploading files...")
    for file_path in sample_files:
        if os.path.exists(file_path):
            print(f"  Uploading {os.path.basename(file_path)}")
            try:
                dataset = gi.tools.upload_file(
                    file_path, 
                    history_id,
                    file_type='fastqsanger.gz'
                )
                uploaded_datasets.append(dataset['outputs'][0]['id'])
                print(f"    ✓ Uploaded successfully")
            except Exception as e:
                print(f"    ✗ Upload failed: {e}")
        else:
            print(f"  ⚠ File not found: {file_path}")
    
    if not uploaded_datasets:
        print("\nNo files uploaded. Creating a test dataset instead...")
        
        # Create a simple text file for testing
        test_content = "This is a test file created via Galaxy API\nLine 2\nLine 3\n"
        
        try:
            # Upload text content directly
            dataset = gi.tools.paste_content(
                content=test_content,
                history_id=history_id,
                file_type='txt'
            )
            uploaded_datasets.append(dataset['outputs'][0]['id'])
            print("  ✓ Created test text file")
        except Exception as e:
            print(f"  ✗ Failed to create test file: {e}")
            return
    
    # Wait for uploads to complete
    print("\nWaiting for uploads to complete...")
    for dataset_id in uploaded_datasets:
        timeout = 60  # 1 minute timeout
        start_time = time.time()
        
        while time.time() - start_time < timeout:
            dataset = gi.datasets.show_dataset(dataset_id)
            status = dataset['state']
            
            if status == 'ok':
                print(f"  ✓ {dataset['name']} - upload complete")
                break
            elif status == 'error':
                print(f"  ✗ {dataset['name']} - upload failed")
                break
            else:
                print(f"  ⏳ {dataset['name']} - status: {status}")
                time.sleep(5)
        else:
            print(f"  ⚠ {dataset['name']} - timeout waiting for upload")
    
    # Try a simple operation if we have basic tools
    print("\nTrying basic Galaxy operations...")
    
    try:
        # Try to use a basic tool like 'wc' (word count) if available
        tools = gi.tools.get_tools()
        wc_tools = [tool for tool in tools if 'wc' in tool['id'].lower() or 'line' in tool['name'].lower()]
        
        if wc_tools and uploaded_datasets:
            print(f"  Found line counting tool: {wc_tools[0]['id']}")
            
            # Run line count on first uploaded file
            wc_result = gi.tools.run_tool(
                history_id=history_id,
                tool_id=wc_tools[0]['id'],
                tool_inputs={
                    'input1': {'src': 'hda', 'id': uploaded_datasets[0]}
                }
            )
            
            if wc_result['outputs']:
                output_id = wc_result['outputs'][0]['id']
                print(f"  ✓ Line count job submitted (output ID: {output_id})")
                
                # Wait for job to complete
                timeout = 120
                start_time = time.time()
                
                while time.time() - start_time < timeout:
                    dataset = gi.datasets.show_dataset(output_id)
                    if dataset['state'] == 'ok':
                        # Download and show result
                        result_content = gi.datasets.download_dataset(output_id)
                        print(f"  ✓ Line count result: {result_content.decode().strip()}")
                        break
                    elif dataset['state'] == 'error':
                        print(f"  ✗ Line count job failed")
                        break
                    else:
                        time.sleep(5)
                else:
                    print(f"  ⚠ Line count job timed out")
            
        else:
            print("  No basic tools found for testing operations")
        
    except Exception as e:
        print(f"  Error running basic operations: {e}")
    
    # List final history contents
    print(f"\nFinal history contents:")
    try:
        history_contents = gi.histories.show_history(history_id, contents=True)
        datasets = history_contents.get('contents', [])
        
        for dataset in datasets:
            print(f"  {dataset['name']} - {dataset['state']} - {dataset['extension']}")
        
    except Exception as e:
        print(f"Could not list history contents: {e}")
    
    # Print summary
    print("\n" + "="*50)
    print("BASIC WORKFLOW COMPLETED")
    print("="*50)
    print(f"History: {HISTORY_NAME}")
    print(f"Files processed: {len(uploaded_datasets)}")
    print(f"Galaxy URL: {GALAXY_URL}/history/view/{history_id}")
    print("\nThis demonstrates basic Galaxy API functionality.")
    print("For more complex workflows, ensure required tools are installed.")

if __name__ == "__main__":
    if API_KEY == "your_api_key_here":
        print("Please configure your API key first!")
        print("Go to Galaxy -> User -> Preferences -> Manage API Key")
    else:
        main()

Connecting to Galaxy at https://usegalaxy.org
Connected to Galaxy version: {'version_major': '25.0', 'version_minor': 'rc1'}
Creating new history: Simple File Upload
History created with ID: bbd44e69cb8906b5b7a7050fbbb7205c

Listing some available tools:
Total tools available: 6889
  upload1 - Upload File
  ucsc_table_direct1 - UCSC Main
  ucsc_table_direct_archaea1 - UCSC Archaea
  sra_source - SRA
  ebi_sra_main - EBI SRA
  intermine - InterMine
  export_remote - Export datasets
  __UNZIP_COLLECTION__ - Unzip collection
  __ZIP_COLLECTION__ - Zip collections
  __FILTER_FAILED_DATASETS__ - Filter failed datasets

Common tools found: ['upload1', 'cat1', 'Cut1', 'Grep1']

Uploading files...
  Uploading sample1_R1.fastq.gz
    ✓ Uploaded successfully
  Uploading sample1_R2.fastq.gz
    ✓ Uploaded successfully

Waiting for uploads to complete...
  ⏳ sample1_R1.fastq.gz - status: queued
  ⏳ sample1_R1.fastq.gz - status: running
  ⏳ sample1_R1.fastq.gz - status: running
  ⏳ sample1_R1.fastq