In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

import nest_asyncio


sys.path.insert(0, os.path.abspath('..'))
nest_asyncio.apply()

In [2]:
from asyncio import run, sleep

from asyncssh import connect
from decouple import config


HPC_USER = config('HPC_USER')
HPC_HOST = config('HPC_HOST')
HPC_PASSPHRASE = config('HPC_PASSPHRASE')

In [None]:
from contextlib import AsyncExitStack
from pathlib import Path
from typing import AsyncGenerator

from asyncssh import connect, read_private_key


class SCPClient:
    def __init__(self, host: str, user: str, passphrase: str):
        self.host = host
        self.user = user
        self.private_key = read_private_key('/.ssh/id_ed25519', passphrase=passphrase)

    async def secure_copy(
        self,
        source_stream: AsyncGenerator[bytes, None],
        target_path: Path,
    ):
        async with AsyncExitStack() as stack:
            conn = await stack.enter_async_context(
                connect(self.host, username=self.user, client_keys=[self.private_key])
            )
            sftp = await stack.enter_async_context(conn.start_sftp_client())
            file = await stack.enter_async_context(sftp.open(str(target_path), 'wb'))

            async for chunk in source_stream:
                await file.write(chunk)

In [None]:
import logging

from asyncssh import ConnectionLost, Error, connect, read_private_key
from backoff import expo, on_exception


class PBSProClient:
    def __init__(self, host: str, user: str, passphrase: str):
        self.host = host
        self.user = user
        self.private_key = read_private_key('/.ssh/id_ed25519', passphrase=passphrase)

    @on_exception(expo, (OSError, ConnectionLost), max_tries=4)
    async def _run(self, args: list) -> str:
        try:
            async with await connect(
                self.host, username=self.user, client_keys=[self.private_key]
            ) as connection:
                result = await connection.run(*args, check=False)

                if result.exit_status != 0:
                    logging.error(
                        f'SSH process with command {result.command},\n'
                        f'completed with status {result.exit_status},\n'
                        f'STDOUT: {result.stderr},\n'
                        f'STDERR: {result.stderr}'
                    )
                    raise Exception(
                        f'SSH process completed with status {result.exit_status}'
                    )

                return result.stdout.strip()

        except Error:
            raise

    async def queue_state(self):
        # qstat
        stdout = await self._run(['qstat'])
        # TODO parse

        return stdout

    async def queue_submit(self, pbs_filename: str):
        # qsub
        stdout = await self._run(['qsub', pbs_filename])
        pass

    async def queue_delete(self, job_id: int):
        # qdel
        stdout = await self._run(['qdel', str(job_id)])
        pass

    async def queue_hold(self):
        # qhold
        pass

    async def queue_release(self):
        # qrls
        pass

    async def trace_job(self):
        # tracejob
        pass

In [5]:
pbs_pro_client = PBSProClient(HPC_HOST, HPC_USER, HPC_PASSPHRASE)
res = await pbs_pro_client.queue_state()

In [6]:
print(res)

Job id                 Name             User              Time Use S Queue
---------------------  ---------------- ----------------  -------- - -----
458873[].x3000c0s25b0* gauss7cCNb       mbakija                  0 B cpu             
458874[].x3000c0s25b0* gauss7cCNc       mbakija                  0 B cpu             
459504.x3000c0s25b0n0  rna_multimer_ka* rpenic            691:33:* R gpu             
460319.x3000c0s25b0n0  rel.pbs          ikovac            49157:3* R cpu             
461941.x3000c0s25b0n0  gcd              lvrban            2288:26* R bigmem          
461966.x3000c0s25b0n0  rna_multimer_ka* rpenic            588:36:* R gpu             
461973.x3000c0s25b0n0  rel.pbs          ikovac            22964:0* R cpu             
462005.x3000c0s25b0n0  phR1-lacuo       ibatisti          4201:07* R cpu             
462015.x3000c0s25b0n0  rel.pbs          ikovac            50095:1* R cpu             
462057.x3000c0s25b0n0  rel.pbs          ikovac            22006:1* R cpu    

In [None]:
# limited to 80 characters (79 + newline)
len('    Error_Path = x3000c0s27b0n0.hsn.hpc.srce.hr:/lustre/home/lpanic/hello_world')

79

In [3]:
# qstat -f 465317
queue_status = """
Job Id: 465317.x3000c0s25b0n0.hsn.hpc.srce.hr
    Job_Name = hello
    Job_Owner = lpanic@x3000c0s27b0n0.hsn.hpc.srce.hr
    resources_used.cpupercent = 0
    resources_used.cput = 00:00:00
    resources_used.mem = 0b
    resources_used.ncpus = 1
    resources_used.vmem = 0kb
    resources_used.walltime = 00:00:00
    job_state = R
    queue = cpu-single
    server = x3000c0s25b0n0.hsn.hpc.srce.hr
    Checkpoint = u
    ctime = Fri Mar 28 10:45:48 2025
    Error_Path = x3000c0s27b0n0.hsn.hpc.srce.hr:/lustre/home/lpanic/hello_world
	/hello.e465317
    exec_host = x8000c1s4b0n1/1
    exec_vnode = (x8000c1s4b0n1:mem=1843200kb:ncpus=1:ngpus=0)
    Hold_Types = n
    Join_Path = n
    Keep_Files = oed
    Mail_Points = a
    mtime = Fri Mar 28 10:45:53 2025
    Output_Path = x3000c0s27b0n0.hsn.hpc.srce.hr:/lustre/home/lpanic/hello_worl
	d/hello.o465317
    Priority = 0
    qtime = Fri Mar 28 10:45:48 2025
    Rerunable = True
    Resource_List.mem = 1800mb
    Resource_List.ncpus = 1
    Resource_List.ngpus = 0
    Resource_List.nodect = 1
    Resource_List.place = pack
    Resource_List.select = 1:mem=1800mb:ncpus=1:ngpus=0
    Resource_List.walltime = 48:00:00
    stime = Fri Mar 28 10:45:48 2025
    session_id = 3900810
    jobdir = /lustre/home/lpanic
    substate = 42
    Variable_List = PBS_O_HOME=/lustre/home/lpanic,PBS_O_LANG=en_US.UTF-8,
	PBS_O_LOGNAME=lpanic,
	PBS_O_PATH=/lustre/home/lpanic/.local/bin:/lustre/home/lpanic/bin:/opt
	/clmgr/sbin:/opt/clmgr/bin:/opt/sgi/sbin:/opt/sgi/bin:/usr/share/Module
	s/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/c3/bin:/op
	t/pbs/bin:/sbin:/bin,PBS_O_MAIL=/var/spool/mail/lpanic,
	PBS_O_SHELL=/bin/bash,PBS_O_HOST=x3000c0s27b0n0.hsn.hpc.srce.hr,
	PBS_O_WORKDIR=/lustre/home/lpanic/hello_world,PBS_O_SYSTEM=Linux,
	PBS_O_QUEUE=RouteQ
         = Job run at Fri Mar 28 at 10:45 on (x8000c1s4b0n1:mem=1843200kb:nc
	pus=1:ngpus=0)
    etime = Fri Mar 28 10:45:48 2025
    run_count = 1
    eligible_time = 00:00:05
    Submit_arguments = -koed hello.sh
    project = _pbs_project_default
    Submit_Host = x3000c0s27b0n0.hsn.hpc.srce.hr
"""

In [12]:
from math_rag.infrastructure.models.pbs import PBSProJob


job = PBSProJob.from_queue_status(queue_status)

In [None]:
# ssh-keygen