# Full Pipeline: DICOM to Connectivity

This notebook demonstrates running the complete neuroimaging pipeline:

1. **HeudiConv**: DICOM → BIDS conversion
2. **QSIPrep**: Diffusion preprocessing
3. **QSIRecon**: Reconstruction and tractography
4. **QSIParc**: Parcellation and regional analysis

## Overview

This end-to-end pipeline takes raw DICOM data and produces:
- BIDS-formatted dataset
- Preprocessed diffusion images
- Structural connectivity matrices
- Regional diffusion metrics
- Quality control reports at each stage

## Setup

In [None]:
import json
import time
from datetime import datetime
from pathlib import Path

from voxelops import (
    HeudiconvDefaults,
    HeudiconvInputs,
    QSIParcDefaults,
    QSIParcInputs,
    QSIPrepDefaults,
    QSIPrepInputs,
    QSIReconDefaults,
    QSIReconInputs,
    run_heudiconv,
    run_qsiparc,
    run_qsiprep,
    run_qsirecon,
)

## Configuration

Define all paths and parameters for the pipeline:

In [None]:
# Data directories -- update these to match your data
dicom_dir = Path("/data/raw/dicom")
bids_dir = Path("/data/bids")
derivatives_dir = Path("/data/derivatives")
work_dir = Path("/data/work")

# Configuration files
heuristic_file = Path("/config/heuristics/brain_bank.py")
recon_spec = Path("/config/recon_specs/dsi_studio_gqi.json")
fs_license = Path("/opt/freesurfer/license.txt")

# Participant to process
participant = "01"

# Create brain bank standard configurations
heudiconv_config = HeudiconvDefaults(
    overwrite=False,
    bids_validator=True,
)

qsiprep_config = QSIPrepDefaults(
    nprocs=16,
    mem_mb=32000,
    output_resolution=1.6,
    longitudinal=False,
    fs_license=fs_license,
)

qsirecon_config = QSIReconDefaults(
    nprocs=24,
    mem_mb=48000,
    fs_license=fs_license,
)

qsiparc_config = QSIParcDefaults(
    n_jobs=4,
    n_procs=2,
)

print("Pipeline configuration:")
print(f"  Participant: {participant}")
print(f"  DICOM directory: {dicom_dir}")
print(f"  BIDS directory: {bids_dir}")
print(f"  Derivatives: {derivatives_dir}")
print(f"  Work directory: {work_dir}")

## Pipeline Execution

### Step 1: DICOM to BIDS Conversion

In [None]:
print("=" * 60)
print("STEP 1: DICOM to BIDS Conversion (HeudiConv)")
print("=" * 60)

heudiconv_inputs = HeudiconvInputs(
    dicom_dir=dicom_dir,
    participant=participant,
    heuristic=heuristic_file,
    output_dir=bids_dir,
)

start_time = time.time()
try:
    heudiconv_result = run_heudiconv(heudiconv_inputs, heudiconv_config)
    elapsed = time.time() - start_time

    print(f"\nHeudiConv completed successfully in {elapsed/60:.1f} minutes")
    print(f"  BIDS directory: {heudiconv_result['expected_outputs'].bids_dir}")
    print(f"  Exit code: {heudiconv_result['exit_code']}")

except Exception as e:
    print(f"\nHeudiConv failed: {e}")
    print("Pipeline aborted.")
    raise

### Step 2: Diffusion Preprocessing

In [None]:
print("\n" + "=" * 60)
print("STEP 2: Diffusion Preprocessing (QSIPrep)")
print("=" * 60)

qsiprep_inputs = QSIPrepInputs(
    bids_dir=heudiconv_result["expected_outputs"].bids_dir,
    participant=participant,
    output_dir=derivatives_dir / "qsiprep",
    work_dir=work_dir / "qsiprep",
)

start_time = time.time()
try:
    qsiprep_result = run_qsiprep(qsiprep_inputs, qsiprep_config)
    elapsed = time.time() - start_time

    print(f"\n✓ QSIPrep completed successfully in {elapsed/60:.1f} minutes")
    print(f"  Output directory: {qsiprep_result['expected_outputs'].qsiprep_dir}")
    print(f"  HTML report: {qsiprep_result['expected_outputs'].html_report}")
    print(f"  Exit code: {qsiprep_result['exit_code']}")

except Exception as e:
    print(f"\n✗ QSIPrep failed: {e}")
    print("Pipeline aborted.")
    raise

### Step 3: Reconstruction and Tractography

In [None]:
print("\n" + "=" * 60)
print("STEP 3: Reconstruction and Tractography (QSIRecon)")
print("=" * 60)

qsirecon_inputs = QSIReconInputs(
    qsiprep_dir=qsiprep_result["expected_outputs"].qsiprep_dir,
    participant=participant,
    recon_spec=recon_spec,
    output_dir=derivatives_dir / "qsirecon",
    work_dir=work_dir / "qsirecon",
)

start_time = time.time()
try:
    qsirecon_result = run_qsirecon(qsirecon_inputs, qsirecon_config)
    elapsed = time.time() - start_time

    print(f"\n✓ QSIRecon completed successfully in {elapsed/60:.1f} minutes")
    print(f"  Output directory: {qsirecon_result['expected_outputs'].qsirecon_dir}")
    print(f"  HTML report: {qsirecon_result['expected_outputs'].html_report}")
    print(f"  Exit code: {qsirecon_result['exit_code']}")

except Exception as e:
    print(f"\n✗ QSIRecon failed: {e}")
    print("Pipeline aborted.")
    raise

### Step 4: Parcellation and Regional Analysis

In [None]:
print("\n" + "=" * 60)
print("STEP 4: Parcellation and Regional Analysis (QSIParc)")
print("=" * 60)

qsiparc_inputs = QSIParcInputs(
    qsirecon_dir=qsirecon_result["expected_outputs"].qsirecon_dir,
    participant=participant,
    output_dir=derivatives_dir / "qsiparc",
)

start_time = time.time()
try:
    qsiparc_result = run_qsiparc(qsiparc_inputs, qsiparc_config)
    elapsed = time.time() - start_time

    print(f"\nQSIParc completed successfully in {elapsed/60:.1f} minutes")
    print(f"  Output directory: {qsiparc_result['expected_outputs'].output_dir}")
    print(f"  Output files: {len(qsiparc_result.get('output_files', []))}")

except Exception as e:
    print(f"\nQSIParc failed: {e}")
    print("Pipeline aborted.")
    raise

## Pipeline Summary

In [None]:
print("\n" + "=" * 60)
print("PIPELINE SUMMARY")
print("=" * 60)

# Collect all results
results = [
    ("HeudiConv", heudiconv_result),
    ("QSIPrep", qsiprep_result),
    ("QSIRecon", qsirecon_result),
    ("QSIParc", qsiparc_result),
]

# Calculate total time
total_duration = sum(
    r.get("duration_seconds", 0) for _, r in results
)

print(f"\nParticipant: {participant}")
print(f"Total processing time: {total_duration/3600:.1f} hours\n")

# Individual step timings
print("Step-by-step breakdown:")
for name, result in results:
    duration = result.get("duration_seconds", 0)
    success = result.get("success", False)
    status = "OK" if success else "FAIL"
    pct = (duration / total_duration * 100) if total_duration else 0
    print(f"  [{status}] {name:12s}: {duration/60:6.1f} minutes ({pct:5.1f}%)")

# Output locations
print("\nOutput locations:")
print(f"  BIDS dataset: {heudiconv_result['expected_outputs'].bids_dir}")
print(f"  QSIPrep: {qsiprep_result['expected_outputs'].qsiprep_dir}")
print(f"  QSIRecon: {qsirecon_result['expected_outputs'].qsirecon_dir}")
print(f"  QSIParc: {qsiparc_result['expected_outputs'].output_dir}")

# QC Reports
print("\nQuality control reports:")
print(f"  QSIPrep: {qsiprep_result['expected_outputs'].html_report}")
print(f"  QSIRecon: {qsirecon_result['expected_outputs'].html_report}")

## Save Pipeline Record

In [None]:
# Create comprehensive pipeline record
pipeline_record = {
    "participant": participant,
    "pipeline_version": "2.0",
    "execution_date": datetime.now().isoformat(),
    "total_duration_seconds": total_duration,
    "total_duration_hours": total_duration / 3600,
    "steps": {
        "heudiconv": heudiconv_result,
        "qsiprep": qsiprep_result,
        "qsirecon": qsirecon_result,
        "qsiparc": qsiparc_result,
    },
    "outputs": {
        "bids_dir": str(heudiconv_result["expected_outputs"].bids_dir),
        "qsiprep_dir": str(qsiprep_result["expected_outputs"].qsiprep_dir),
        "qsirecon_dir": str(qsirecon_result["expected_outputs"].qsirecon_dir),
        "qsiparc_dir": str(qsiparc_result["expected_outputs"].output_dir),
    },
}

# Save to JSON
record_dir = Path("/data/records/pipeline")
record_dir.mkdir(parents=True, exist_ok=True)

record_file = record_dir / f"{participant}_pipeline.json"

with open(record_file, "w") as f:
    json.dump(pipeline_record, f, indent=2, default=str)

print(f"Pipeline record saved to: {record_file}")

## Batch Processing Multiple Participants

Process multiple participants through the full pipeline:

In [None]:
def run_full_pipeline(participant, configs):
    """Run the full pipeline for a single participant.

    Parameters
    ----------
    participant : str
        Participant ID (without 'sub-' prefix).
    configs : dict
        Dict with 'heudiconv', 'qsiprep', 'qsirecon', 'qsiparc' config objects.

    Returns
    -------
    dict
        Results from all steps.
    """
    results = {}
    pipeline_start = time.time()

    try:
        # Step 1: HeudiConv
        print("  [1/4] Running HeudiConv...")
        heudiconv_inputs = HeudiconvInputs(
            dicom_dir=dicom_dir,
            participant=participant,
            heuristic=heuristic_file,
            output_dir=bids_dir,
        )
        results["heudiconv"] = run_heudiconv(heudiconv_inputs, configs["heudiconv"])

        # Step 2: QSIPrep
        print("  [2/4] Running QSIPrep...")
        qsiprep_inputs = QSIPrepInputs(
            bids_dir=results["heudiconv"]["expected_outputs"].bids_dir,
            participant=participant,
        )
        results["qsiprep"] = run_qsiprep(qsiprep_inputs, configs["qsiprep"])

        # Step 3: QSIRecon
        print("  [3/4] Running QSIRecon...")
        qsirecon_inputs = QSIReconInputs(
            qsiprep_dir=results["qsiprep"]["expected_outputs"].qsiprep_dir,
            participant=participant,
            recon_spec=recon_spec,
        )
        results["qsirecon"] = run_qsirecon(qsirecon_inputs, configs["qsirecon"])

        # Step 4: QSIParc
        print("  [4/4] Running QSIParc...")
        qsiparc_inputs = QSIParcInputs(
            qsirecon_dir=results["qsirecon"]["expected_outputs"].qsirecon_dir,
            participant=participant,
        )
        results["qsiparc"] = run_qsiparc(qsiparc_inputs, configs["qsiparc"])

        # Success
        pipeline_duration = time.time() - pipeline_start
        results["success"] = True
        results["pipeline_duration"] = pipeline_duration

        return results

    except Exception as e:
        pipeline_duration = time.time() - pipeline_start
        results["success"] = False
        results["error"] = str(e)
        results["pipeline_duration"] = pipeline_duration
        return results


# Process multiple participants
participants = ["01", "02", "03"]

configs = {
    "heudiconv": heudiconv_config,
    "qsiprep": qsiprep_config,
    "qsirecon": qsirecon_config,
    "qsiparc": qsiparc_config,
}

batch_results = []

print(f"\nProcessing {len(participants)} participants through full pipeline...\n")

for i, participant in enumerate(participants, 1):
    print(f"[{i}/{len(participants)}] Processing participant {participant}")

    result = run_full_pipeline(participant, configs)
    batch_results.append(result)

    if result["success"]:
        duration = result["pipeline_duration"]
        print(f"  Completed in {duration/3600:.1f} hours\n")
    else:
        print(f"  Failed: {result['error']}\n")

# Batch summary
successful = sum(1 for r in batch_results if r["success"])
total_time = sum(r["pipeline_duration"] for r in batch_results)

print("=" * 60)
print("BATCH PROCESSING SUMMARY")
print("=" * 60)
print(f"Total participants: {len(batch_results)}")
print(f"  Successful: {successful}")
print(f"  Failed: {len(batch_results) - successful}")
print(f"Total processing time: {total_time/3600:.1f} hours")
if batch_results:
    print(f"Average time per participant: {total_time/len(batch_results)/3600:.1f} hours")

## Next Steps

After completing the full pipeline:

1. **Quality Control**: Review HTML reports for each step
2. **Data Validation**: Check that all expected outputs exist
3. **Statistical Analysis**: Analyze connectivity matrices and regional metrics
4. **Visualization**: Create figures for publication
5. **Database Integration**: Save execution records to your brain bank database

## Tips for Production

- **Workflow Manager**: Use Airflow, Prefect, or Nextflow for scheduling
- **Error Handling**: Implement retry logic and notifications
- **Resource Management**: Monitor disk space and memory usage
- **Parallel Processing**: Run independent participants in parallel
- **Checkpointing**: Save results after each step to enable restarts
- **Logging**: Store all execution records for audit trail
- **Version Control**: Pin Docker image versions for reproducibility