In [None]:
import sys
import os
import re
import subprocess
import warnings
import copy
from pathlib import Path


In [None]:
%load_ext autoreload
%autoreload 2
import bnpm

## General Strategy
```bash
#!/bin/bash
# Log in (if not already logged in)
globus login

# Start transfer
globus transfer -r <SOURCE_ENDPOINT_ID>:"/path/to/source_folder" <DESTINATION_ENDPOINT_ID>:"/path/to/destination_folder" --label "HMS-RC to FAS RC Holyoke Transfer"

# Optionally, display tasks
globus task list
```

## Prepare globus CLI
- activate an environment and install globus with pip: `pip install globus-cli`
- log in with `globus login`

## Identify Your Endpoints

You need the unique endpoint IDs for both storage systems. You can search for them in CLI using:
```bash
globus endpoint search "HMS-RC"
globus endpoint search "FAS RC Holyoke"
```
Review the search results and note the endpoint IDs (they typically look like a long UUID). If you already have these IDs from your institution’s documentation, you can use them directly.

## Set your endpoints
Note that when you go to do an action, you'll need to authenticate your access to an endpoint. Just follow instructions. You should probably be required to authenticate via a call like the following:
`globus session consent 'urn:globus:auth:scope:transfer.api.globus.org:all[*https://auth.globus.org/scopes/{endpoint}/data_access]'`

In [None]:
params = {
    "HMS": {
        "endpoint": "b0718922-7031-11e9-b7f8-0a37f382de32",
        "hostname": "transfer.rc.hms.harvard.edu",
        "username": "rh183",
    },
    "FAS": {
        "endpoint": "1156ed9e-6984-11ea-af52-0201714f6eab",
        "hostname": "rc.fas.harvard.edu",
        "username": "rhakim",
    },
}

In [None]:
# call_CLI("globus session consent 'urn:globus:auth:scope:transfer.api.globus.org:all[*https://auth.globus.org/scopes/b0718922-7031-11e9-b7f8-0a37f382de32/data_access]'")
# call_CLI("globus session consent 'urn:globus:auth:scope:transfer.api.globus.org:all[*https://auth.globus.org/scopes/1156ed9e-6984-11ea-af52-0201714f6eab/data_access]'")

In [None]:
## HELPERS

def call_CLI(command):
    ## Call the CLI command, collect and print the outputs
    print(command)
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    output, error = process.communicate()
    print(output.decode('utf-8'))
    print(error.decode('utf-8'))
    return output.decode('utf-8')

def ls(endpoint, path):
    ## List the files in the given path
    return call_CLI(f'globus ls {endpoint}:"{path}"')

In [None]:
## initialize ssh_transfer
def make_ssh_sftp(
    hostname,
    username,
):
    import getpass
    print(f"Connecting to {hostname} as {username}")
    if hostname == 'rc.fas.harvard.edu':
        ssh_t = bnpm.server.ssh_interface()
        ssh_t.fasrc_connect(username=username)
    elif hostname == 'transfer.rc.hms.harvard.edu':
        use_localSshKey = False
        pw = bnpm.server.pw_encode(getpass.getpass(prompt='Password for HMS: ')) if (use_localSshKey==False) else None
        # path_sshKey = '/home/rich/.ssh/id_rsa' if use_localSshKey else None
        
        ssh_t = bnpm.server.ssh_interface(
            nbytes_toReceive=20000,
            recv_timeout=1,
            verbose=True,
        )
        ssh_t.o2_connect(
            hostname=hostname,
            username=username,
            password=bnpm.server.pw_decode(pw),
            # key_filename=path_sshKey,
            look_for_keys=False,
            passcode_method=1,
            verbose=0,
            skip_passcode=False,    
        )
    else:
        warnings.warn(f"Unknown hostname: {hostname}")
    
    sftp_t = bnpm.server.sftp_interface(ssh_client=ssh_t.client)
        
    return {
        "ssh": ssh_t,
        "sftp": sftp_t,
    }

Make a dict called 'conn' that holds sftp connections for the different servers in params

In [None]:
conn = {name: make_ssh_sftp(hostname=params[name]["hostname"], username=params[name]["username"]) for name in ["FAS", "HMS"]}

## Find paths to transfer

In [None]:
paths_found_1 = conn['HMS']['sftp'].search_recursive(
    dir_outer='/n/files/Neurobio/MICROSCOPE/Rich/data/2pRAM/facerhythm_stroke_biomarker_exp/camera/',
    # dir_outer='/n/holylabs/LABS/bsabatini_lab/Users/rhakim/data/2pRAM/facerhythm_stroke_biomarker_exp/camera/20250303/PS46',
    # reMatch='video1.*avi',
    reMatch='video1.*avi|timestamp1.*csv',
    # reMatch='PointTracker\.h5',
    # reMatch='PS46',
    # reMatch_in_path='20250319.*PS46',
    reMatch_in_path='PS47',
    depth=5,
    find_folders=False,
    find_files=True,
    verbose=True,
)

In [None]:
## Get size of transfer
sizes_source = {}
for path in paths_found_1:
    if conn['HMS']['sftp'].isdir_remote(path):
        props = conn['HMS']['sftp'].list_fileSizes_recursive(path)
        sizes_source[path] = sum([s for p, s in props.items()])
    else:
        props = conn['HMS']['sftp'].get_fileProperties(path)
        sizes_source[path] = props['size']
        
size_total = sum(sizes_source.values())
print(f"Total size: {size_total/1e9:.2f} GB")
print("\n")
print(f"Sizes of individual elements:")
for path, size in sizes_source.items():
    print(f"{size/1e9:.2f} GB: {path}")

In [None]:
paths_source = copy.deepcopy(paths_found_1)

In [None]:
paths_found_2 = conn['FAS']['sftp'].search_recursive(
    # dir_outer='/n/files/Neurobio/MICROSCOPE/Rich/data/2pRAM/facerhythm_stroke_biomarker_exp/camera/',
    # dir_outer='/n/holylabs/LABS/bsabatini_lab/Users/rhakim/data/2pRAM/facerhythm_stroke_biomarker_exp/camera/',
    # dir_outer='/n/netscratch/bsabatini_lab/Lab/rhakim/data/2pRAM/facerhythm_stroke_biomarker_exp/camera/',
    # dir_outer='/n/netscratch/bsabatini_lab/Lab/rhakim/',
    dir_outer='/n/netscratch/bsabatini_lab/Lab/rhakim/data/2pRAM/facerhythm_stroke_biomarker_exp/camera/cam1',
    # reMatch='video1.*avi',
    # reMatch='PS46',
    reMatch='video1.*avi|timestamp1.*csv',
    reMatch_in_path='',
    depth=8,
    find_folders=False,
    find_files=True,
    verbose=True,
)

In [None]:
paths_found_2

In [None]:
paths_dest = "/n/netscratch/bsabatini_lab/Lab/rhakim/data/2pRAM/facerhythm_stroke_biomarker_exp/camera/cam1"

## Transfer

In [None]:
# paths_source_dest = {source: str(Path(paths_dest) / Path(source).parts[-1] / Path(source).parts[-2]) for source in paths_source}
# paths_source_dest = {source: str(Path(paths_dest) / Path(source).parts[-2] / Path(source).parts[-3] / Path(source).parts[-1]) for source in paths_source}
paths_source_dest = {source: str(Path(paths_dest) / Path(source).parts[-2] / Path(source).parts[-3] / Path(source).parts[-1]) for source in paths_source if not ('PS46' in source) and not ('L_' in Path(source).parts[-2]) and not ('calibration' in Path(source).parts[-2])}
paths_source_dest

In [None]:
## Check if the destination exists, and if so check if the size is the same
paths_toTransfer = {}
for source, dest in paths_source_dest.items():
    if conn['FAS']['sftp'].isdir_remote(dest):
        props = conn['FAS']['sftp'].list_fileSizes_recursive(dest)
        size_dest = sum([s for p, s in props.items()])
    else:
        props = conn['FAS']['sftp'].get_fileProperties(dest)
        if props is None:
            # print(f"File does not exist. {source} -> {dest}")
            paths_toTransfer[source] = dest
            continue
        size_dest = props['size']
        
    size_source = sizes_source[source]
    
    if size_source == size_dest:
        # print(f"File already exists and is the same size. {source} -> {dest}")
        pass
    elif size_source > size_dest:
        print(f"File exists but is smaller. {source} -> {dest}")
        paths_toTransfer[source] = dest
    elif size_source < size_dest:
        print(f"WARNING!!!!! File exists but is larger. {source} -> {dest}")
    else:
        print(f"Something went wrong. {source} -> {dest}")

In [None]:
for source, dest in paths_toTransfer.items():
    print(f"{source} -> {dest}")
print(f"Number of files to transfer: {len(paths_toTransfer)}")

In [None]:
# globus transfer -r <SOURCE_ENDPOINT_ID>:"/path/to/source_folder" <DESTINATION_ENDPOINT_ID>:"/path/to/destination_folder" --label "HMS-RC to FAS RC Holyoke Transfer"

for ii, (source, dest) in enumerate(paths_toTransfer.items()):
    ## if a folder
    if conn['HMS']['sftp'].isdir_remote(source):
        print(f"Initiating transfer:\nContents from: {source}\nInto:          {dest}")
        flag_folder = "-r"
    else:
        print(f"Initiating transfer:\From: {source}\nTo: {dest}")
        flag_folder = ""
    
    command = f'globus transfer {flag_folder} {params["HMS"]["endpoint"]}:"{source}" {params["FAS"]["endpoint"]}:"{dest}" --label "HMS-RC to FAS RC Holyoke Transfer_{ii}"'
    # print(command)
    call_CLI(command)
    # break

In [None]:
test = conn['FAS']['sftp'].search_recursive(
    # dir_outer='/n/files/Neurobio/MICROSCOPE/Rich/data/2pRAM/facerhythm_stroke_biomarker_exp/camera/',
    # dir_outer='/n/holylabs/LABS/bsabatini_lab/Users/rhakim/data/2pRAM/facerhythm_stroke_biomarker_exp/camera/',
    # dir_outer='/n/netscratch/bsabatini_lab/Lab/rhakim/data/2pRAM/facerhythm_stroke_biomarker_exp/camera/',
    dir_outer='/n/holylabs/LABS/bsabatini_lab/Users/rhakim/analysis/face_rhythm/PS46/run_20250325',
    # dir_outer='/n/holylabs/LABS/bsabatini_lab/Users/rhakim/analysis/face_rhythm/PS46/run_20250325',
    # reMatch='video1.*avi',
    # reMatch='PS46',
    reMatch='PointTracker\.h5',
    # reMatch_in_path='',
    depth=14,
    find_folders=False,
    find_files=True,
    verbose=1,
)

## Helpers

In [None]:
## Check status with CLI: `globus task list`
## Show only tasks that are active

call_CLI('globus task list --filter-status ACTIVE')

In [None]:
## Check status with CLI: `globus task list`
## Show only tasks that are active

call_CLI('globus task list --filter-status FAILED')

In [None]:
## Stop a specific task
# call_CLI('globus task cancel <task_id>')
call_CLI('globus task cancel 8c5f04ff-04e4-11f0-b64c-0e283342ad7b')

In [None]:
## Stop all active jobs
call_CLI('globus task cancel --all')