In [9]:

import sys
import os
import luigi
import sciluigi as sl
import logging
import yaml
import paramiko
import time

sys.path.append('..')
# everyone needs to be quite 
logging.getLogger().setLevel(logging.WARNING)
logging.getLogger('SMB').setLevel(logging.WARNING)
logging.getLogger('napari').setLevel(logging.WARNING)
logging.getLogger('matplotlib').setLevel(logging.WARNING)
logging.getLogger('in_n_out').setLevel(logging.WARNING)
logging.getLogger('numcodecs').setLevel(logging.WARNING)
logging.getLogger('numba').setLevel(logging.WARNING)
logging.getLogger('luigi').setLevel(logging.WARNING)
logging.getLogger('numexpr').setLevel(logging.WARNING)
logging.getLogger('luigi-interface').setLevel(logging.WARNING)
logging.getLogger('sciluigi-interface').setLevel(logging.WARNING)
logging.getLogger('cellpose').setLevel(logging.WARNING)
logging.getLogger('paramiko').setLevel(logging.WARNING)

from src import Receipt, NASConnection
from src.steps import get_task

### How to set up config keys
1. **Make your keys:** 
    `ssh-keygen -t rsa -b 4096 -C "formanj@keck.engr.colostate.com"`  
    This will create `keck` and `keck.pub` files which are your keys.

2. **Move pub key to cluster:**  
    `ssh formanj@keck.engr.colostate.edu`  
    `mkdir -p ~/.ssh`  
    `chmod 700 ~/.ssh` give read, write, and execute premission  
    `vim ~/.ssh/authorized_keys`  
    Copy and paste `keck.pub` content into this file  
    `chmod 600 ~/.ssh/authorized_keys` give read, write premission

3. **Connect to cluster:**  
    `ssh -i C:\Users\formanj\keck formanj@keck.engr.colostate.edu`

In [10]:
keyfile = r'C:\Users\formanj\keck' # put the path to your public key here

In [11]:
from luigi.contrib.ssh import RemoteTarget
import subprocess

class AngelFISHLuigiTask(luigi.Task):
    receipt_path = luigi.Parameter()
    remote_path = luigi.Parameter()
    step_name = luigi.Parameter()
    output_path = luigi.Parameter()
    config_path = luigi.Parameter()
    public_key = luigi.Parameter()

    def out_doneflag(self):
        return RemoteTarget(self.output_path, host='keck.engr.colostate.edu', username='formanj', key_file=self.public_key)

    def run(self):
        # Load the configuration
        conf = yaml.safe_load(open(str(self.config_path)))
        usr = str(conf['user']['username'])
        pwd = str(conf['user']['password'])
        remote_address = str(conf['user']['remote_address'])
        port = 22

        # Create SSH client
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        ssh.connect(remote_address, port, usr, pwd)

        remote_receipt_path = os.path.basename(self.receipt_path)
        # remote_receipt_path = remote_receipt_path.replace('\\', '/')

        # Submit the SLURM job and capture job ID
        sbatch_command = f'sbatch --parsable run_step.sh {remote_receipt_path} {self.step_name}'
        combined_command = f'cd {self.remote_path}; {sbatch_command}'

        stdin, stdout, stderr = ssh.exec_command(combined_command)
        job_submission_output = stdout.read().decode().strip()
        job_submission_error = stderr.read().decode()

        # Parse job ID
        if job_submission_error:
            raise RuntimeError(f"SLURM job submission failed: {job_submission_error}")

        job_id = job_submission_output.split(';')[0]
        print(f"Submitted SLURM job with ID: {job_id}")

        # Poll until job completes
        import time

        def is_job_active(ssh_client, job_id):
            check_command = f'squeue -j {job_id} -h'
            stdin, stdout, stderr = ssh_client.exec_command(check_command)
            result = stdout.read().decode()
            return bool(result.strip())  # job is active if output is non-empty

        wait_time = 10  # seconds
        max_wait = 3600  # 1 hour timeout
        elapsed = 0

        while is_job_active(ssh, job_id):
            if elapsed >= max_wait:
                raise TimeoutError(f"SLURM job {job_id} did not complete within {max_wait} seconds.")
            print(f"[{job_id}] Still running... waiting {wait_time}s")
            time.sleep(wait_time)
            elapsed += wait_time

        print(f"[{job_id}] Job complete.")

        # Close the SSH connection
        ssh.close()


In [None]:
class Upload_Task(luigi.Task):
    output_path = luigi.Parameter()
    receipt_path = luigi.Parameter()
    remote_path = luigi.Parameter()
    config_path = luigi.Parameter()
    public_key = luigi.Parameter()

    def out_doneflag(self):
        return RemoteTarget(self.output_path, host='keck.engr.colostate.edu', username='formanj', key_file=self.public_key)

    def run(self):
        conf = yaml.safe_load(open(self.config_path))
        usr = str(conf['user']['username'])
        pwd = str(conf['user']['password'])
        remote_address = str(conf['user']['remote_address'])
        port = 22

        # Create SSH client
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        ssh.connect(remote_address, port, usr, pwd)

        # Remote path to file with all directories
        remote_receipt_path = os.path.join(self.remote_path, os.path.basename(self.receipt_path))
        remote_receipt_path = remote_receipt_path.replace('\\', '/')

        # Transfer the file
        sftp = ssh.open_sftp()
        sftp.put(self.receipt_path, remote_receipt_path)
        sftp.close()

In [13]:
class AngelFISHWorkflow(sl.WorkflowTask):
    receipt_path = luigi.Parameter()
    cluster_path = luigi.Parameter()
    config_path = luigi.Parameter()
    public_key = luigi.Parameter()

    def workflow(self):
        # upload 
        task_refs = []
        self.cluster_path = self.cluster_path.replace('\\', '/')
        receipt_filename = os.path.basename(self.receipt_path).replace('\\', '/')
        remote_receipt_path = os.path.join(self.cluster_path, receipt_filename).replace('\\', '/')
        # status_dir = r'C:\Users\formanj\GitHub\AngelFISH\cluster\status'
        
        step_task = self.new_task(
                            'upload_task',
                            Upload_Task,
                            output_path=remote_receipt_path, # os.path.join(status_dir, 'upload_receipt.done'),
                            receipt_path=self.receipt_path,
                            remote_path=self.cluster_path,
                            config_path=self.config_path,
                            public_key=self.public_key
                        )
        previous_task = step_task
        task_refs.append(step_task)

        receipt = Receipt(path=self.receipt_path)
        step_order = receipt['step_order']
        name = os.path.basename(receipt['arguments']['nas_location'])
        database_loc = os.path.dirname(self.cluster_path)
        database_loc = os.path.join(database_loc, 'database')
        remote_local_location = os.path.join(database_loc, name).replace('\\', '/')
        remote_analysis_dir = os.path.join(remote_local_location, receipt['arguments']['analysis_name'])
        remote_status_dir = os.path.join(remote_analysis_dir, 'status')

        for step_name in step_order:
            StepTask = type(
                step_name,          # Unique class name
                (AngelFISHLuigiTask,),            # Base class
                {}                                # No extra attributes needed
            )
            
            path = os.path.join(remote_status_dir, f'step_{step_name}.txt').replace('\\', '/')
            step_task = self.new_task(
                                step_name,
                                StepTask,
                                receipt_path=remote_receipt_path,
                                step_name=step_name,
                                output_path=path, # os.path.join(status_dir, f'{step_name}.done'),
                                remote_path=self.cluster_path,
                                config_path=self.config_path,
                                public_key=self.public_key
                                )

            # Add dependency chain
            if previous_task is not None:
                step_task.in_upstream = previous_task.out_doneflag
            previous_task = step_task
            task_refs.append(step_task)
        return task_refs

### Scheduler
1. **Luigi scheduler:**  
    Run in command line `luigid`  
    connect to http://localhost:8082 to view  
    local_scheduler=False for this to be used  
2. **Local Scheduler:**  
    This will start a scheduler for each run  
    local_scheduler=True




In [14]:
# luigi.build(AngelFISHWorkflow(
#                             receipt_path=r'C:\Users\formanj\GitHub\AngelFISH\examples\new_pipeline.json',
#                             cluster_path = '/home/formanj/Github/AngelFISH/cluster',
#                             config_path = r'C:\Users\formanj\GitHub\AngelFISH\config_cluster.yml',
#                             public_key = keyfile
#                             ).workflow(), local_scheduler=False)

## Under development

In [33]:
class DownloadDataTask(AngelFISHLuigiTask, luigi.Task):
    def output(self):
        return self.out_doneflag()

class SegmentNuc(AngelFISHLuigiTask, luigi.Task):
    def output(self):
        return self.out_doneflag()

class SegmentCyto(AngelFISHLuigiTask, luigi.Task):
    def output(self):
        return self.out_doneflag()

class MatchCytoAndNuc(AngelFISHLuigiTask, luigi.Task):
    def output(self):
        return self.out_doneflag()

class DetectDUSP1(AngelFISHLuigiTask, luigi.Task):
    def output(self):
        return self.out_doneflag()

class MeasureCellProps(AngelFISHLuigiTask, luigi.Task):
    def output(self):
        return self.out_doneflag()

class ReturnData(AngelFISHLuigiTask, luigi.Task):
    def output(self):
        return self.out_doneflag()

class ParAngelFISHWorkflow(sl.WorkflowTask):
    receipt_path = luigi.Parameter()
    cluster_path = luigi.Parameter()
    config_path = luigi.Parameter()
    public_key = luigi.Parameter()

    def workflow(self):
        # upload 
        task_refs = []
        self.cluster_path = self.cluster_path.replace('\\', '/')
        receipt_filename = os.path.basename(self.receipt_path).replace('\\', '/')
        remote_receipt_path = os.path.join(self.cluster_path, receipt_filename).replace('\\', '/')
        
        upload_task = self.new_task(
                            'upload_task',
                            Upload_Task,
                            output_path=remote_receipt_path,
                            receipt_path=self.receipt_path,
                            remote_path=self.cluster_path,
                            config_path=self.config_path,
                            public_key=self.public_key
                        )
        task_refs.append(upload_task)

        receipt = Receipt(path=self.receipt_path)
        name = os.path.basename(receipt['arguments']['nas_location'])
        database_loc = os.path.dirname(self.cluster_path)
        database_loc = os.path.join(database_loc, 'database')
        remote_local_location = os.path.join(database_loc, name).replace('\\', '/')
        remote_analysis_dir = os.path.join(remote_local_location, receipt['arguments']['analysis_name'])
        remote_status_dir = os.path.join(remote_analysis_dir, 'status')

        # Download data
        path = os.path.join(remote_status_dir, f'step_download_data.txt').replace('\\', '/')
        download_data_task = self.new_task(
                    'download_data',
                    DownloadDataTask,
                    receipt_path=remote_receipt_path,
                    step_name='download_data',
                    output_path=path,
                    remote_path=self.cluster_path,
                    config_path=self.config_path,
                    public_key=self.public_key
                    )
        download_data_task.in_upstream = upload_task.out_doneflag
        task_refs.append(download_data_task)

        # Segment nuc
        path = os.path.join(remote_status_dir, f'step_segment_nuc.txt').replace('\\', '/')
        segment_nuc_task = self.new_task(
                            'segment_nuc',
                            SegmentNuc,
                            receipt_path=remote_receipt_path,
                            step_name='segment_nuc',
                            output_path=path,
                            remote_path=self.cluster_path,
                            config_path=self.config_path,
                            public_key=self.public_key
                            )
        segment_nuc_task.in_upstream = download_data_task.out_doneflag
        task_refs.append(segment_nuc_task)

        # Segment cyto
        path = os.path.join(remote_status_dir, f'step_segment_cyto.txt').replace('\\', '/')
        segment_cyto_task = self.new_task(
                            'segment_cyto',
                            SegmentCyto,
                            receipt_path=remote_receipt_path,
                            step_name='segment_cyto',
                            output_path=path,
                            remote_path=self.cluster_path,
                            config_path=self.config_path,
                            public_key=self.public_key
                            )
        segment_cyto_task.in_upstream = download_data_task.out_doneflag
        task_refs.append(segment_cyto_task)

        # match_cyto_and_nuc
        path = os.path.join(remote_status_dir, f'step_match_cyto_and_nuc.txt').replace('\\', '/')
        match_cyto_and_nuc_task = self.new_task(
                            'match_cyto_and_nuc',
                            MatchCytoAndNuc,
                            receipt_path=remote_receipt_path,
                            step_name='match_cyto_and_nuc',
                            output_path=path,
                            remote_path=self.cluster_path,
                            config_path=self.config_path,
                            public_key=self.public_key
                            )
        match_cyto_and_nuc_task.in_upstream = [segment_nuc_task.out_doneflag, segment_cyto_task.out_doneflag]
        task_refs.append(match_cyto_and_nuc_task)

        # detect_dusp1
        path = os.path.join(remote_status_dir, f'step_detect_dusp1.txt').replace('\\', '/')
        detect_dusp1_task = self.new_task(
                            'detect_dusp1',
                            DetectDUSP1,
                            receipt_path=remote_receipt_path,
                            step_name='detect_dusp1',
                            output_path=path,
                            remote_path=self.cluster_path,
                            config_path=self.config_path,
                            public_key=self.public_key
                            )
        detect_dusp1_task.in_upstream = match_cyto_and_nuc_task.out_doneflag
        task_refs.append(detect_dusp1_task)

        # measure_cell_props
        path = os.path.join(remote_status_dir, f'step_measure_cell_props.txt').replace('\\', '/')
        measure_cell_props_task = self.new_task(
                            'measure_cell_props',
                            MeasureCellProps,
                            receipt_path=remote_receipt_path,
                            step_name='measure_cell_props',
                            output_path=path,
                            remote_path=self.cluster_path,
                            config_path=self.config_path,
                            public_key=self.public_key
                            )
        measure_cell_props_task.in_upstream = detect_dusp1_task.out_doneflag
        task_refs.append(measure_cell_props_task)

        # return_data
        path = os.path.join(remote_status_dir, f'step_return_data.txt').replace('\\', '/')
        return_data_task = self.new_task(
                            'return_data',
                            ReturnData,
                            receipt_path=remote_receipt_path,
                            step_name='return_data',
                            output_path=path,
                            remote_path=self.cluster_path,
                            config_path=self.config_path,
                            public_key=self.public_key
                            )
        return_data_task.in_upstream = measure_cell_props_task.out_doneflag
        task_refs.append(return_data_task)

        return task_refs


In [34]:
luigi.build(ParAngelFISHWorkflow(
                            receipt_path=r'C:\Users\formanj\GitHub\AngelFISH\examples\new_pipeline.json',
                            cluster_path = '/home/formanj/Github/AngelFISH/cluster',
                            config_path = r'C:\Users\formanj\GitHub\AngelFISH\config_cluster.yml',
                            public_key = keyfile
                            ).workflow(), local_scheduler=False, workers=4)

  is_complete = task.complete()


False