# Transfer PlexPipe settings files

This notebook provides a workflow for transferring PlexPipe configuration files between Globus endpoints. This is particularly useful when the ROI Definition phase is conducted on a local workstation, while subsequent high-throughput analysis is going to be performed on a remote HPC cluster.

Note: Prior to initiating the transfer, ensure that all file paths within the configuration are updated to reflect the remote filesystem hierarchy.

In [1]:
%load_ext autoreload
%autoreload 2

from pathlib import Path
from globus_sdk import TransferData
from plex_pipe import load_config
from plex_pipe.utils import GlobusConfig, globus_dir_exists, create_globus_tc

## Read in configs

In [2]:
# load analysis configuration
local_config_path = Path.cwd().parents[1] / "examples/example_pipeline_config_remote.yaml"

config = load_config(local_config_path)



In [3]:
local_analysis_dir = Path.cwd().parents[1] / "examples/output/sample_analysis"
remote_analysis_dir = str(config.analysis_dir).replace('\\',"/")
remote_analysis_dir

'/analysis_dir/sample_analysis'

In [None]:
# globus settings path
globus_config_path = Path.cwd().parents[1] / "examples/example_pipeline_config_globus.yaml"

In [5]:
# get Globus settings
gc = GlobusConfig.from_yaml(globus_config_path, source_key = 'cbi_collection_id', dest_key = 'r_collection_id')
tc = create_globus_tc(gc.client_id, gc.transfer_tokens)

## Create remote analysis directory

In [6]:
# check if the destination analysis directory exists and create it if necessary

directory_exists = globus_dir_exists(tc, gc.destination.collection_id, remote_analysis_dir)

if not directory_exists:
    tc.operation_mkdir(gc.destination.collection_id, str(remote_analysis_dir))
    print(f"Directory {remote_analysis_dir} created in the destination collection.")
else:
    print(f"Directory {remote_analysis_dir} already exists in the destination collection.")

Directory /analysis_dir/sample_analysis already exists in the destination collection.


## Transfer files

In [7]:
# build a transfer map

local_config_path_globus = gc.source.local_to_globus(local_config_path)
remote_config_path_globus = remote_analysis_dir + '/' + local_config_path.name
print(f"Config file will be transferred from {local_config_path_globus} to {remote_config_path_globus}")


local_rois_info_path_globus = gc.source.local_to_globus(local_analysis_dir / config.roi_info_file_path.name)
remote_rois_info_path_globus = remote_analysis_dir + '/' + config.roi_info_file_path.name
print(f"ROI info file will be transferred from {local_rois_info_path_globus} to {remote_rois_info_path_globus}")

Config file will be transferred from /D/plex-pipe/examples/example_pipeline_config_remote.yaml to /analysis_dir/sample_analysis/example_pipeline_config_remote.yaml
ROI info file will be transferred from /D/plex-pipe/examples/output/sample_analysis/rois.pkl to /analysis_dir/sample_analysis/rois.pkl


In [8]:
# create and submit the transfer
td = TransferData(
    source_endpoint=gc.source.collection_id,
    destination_endpoint=gc.destination.collection_id,
    label=f"Transfer settings for {Path(local_config_path).name}",
    sync_level="checksum",
)

td.add_item(local_config_path_globus, remote_config_path_globus)
td.add_item(local_rois_info_path_globus, remote_rois_info_path_globus)

transfer_result = tc.submit_transfer(td)

In [10]:
# monitor transfer
task_id = transfer_result["task_id"]

# Retrieve current task details
task = tc.get_task(task_id)

print(f"Task ID: {task_id}")
print(f"Status: {task['status']}")  # Possible: 'ACTIVE', 'SUCCEEDED', or 'FAILED'
print(f"Files Transferred: {task['files_transferred']}")

Task ID: 5d6b02d4-0dcc-11f1-987c-025da3873719
Status: SUCCEEDED
Files Transferred: 0
