Skip to content

Commit

Permalink
Update the SlurmJob class
Browse files Browse the repository at this point in the history
  • Loading branch information
TorecLuik committed Apr 18, 2024
1 parent bf2d2af commit 5577904
Showing 1 changed file with 53 additions and 2 deletions.
55 changes: 53 additions & 2 deletions biomero/slurm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,46 @@


class SlurmJob:
"""Represents a job submitted to a Slurm cluster.
This class encapsulates information and methods related to managing a job
submitted to a Slurm cluster. It provides functionality to monitor the
job's state, wait for completion, and perform cleanup operations.
Attributes:
job_id (int): The ID of the Slurm job.
submit_result (Result): The result of submitting the job.
ok (bool): Indicates whether the job submission was successful.
job_state (str): The current state of the Slurm job.
error_message (str): The error message, if any.
Args:
submit_result (Result): The result of submitting the job.
job_id (int): The Slurm job ID.
Example:
# Submit some job with the SlurmClient
submit_result, job_id = slurmClient.run_workflow(
workflow_name, workflow_version, input_data, email, time, **kwargs)
# Create a SlurmJob instance
slurmJob = SlurmJob(submit_result, job_id)
if not slurmJob.ok:
logger.warning(f"Error with job: {slurmJob.get_error()}")
else:
try:
slurmJob.wait_for_completion(slurmClient, conn)
if not slurmJob.completed():
raise Exception(f"Job is not completed: {slurmJob}")
else:
slurmJob.cleanup(slurmClient)
except Exception as e:
logger.error(f" ERROR WITH JOB: {e}")
raise e
"""

def __init__(self,
submit_result: Result,
job_id: int):
Expand All @@ -48,6 +88,7 @@ def __init__(self,
self.submit_result = submit_result
self.ok = self.submit_result.ok
self.job_state = None
self.error_message = self.submit_result.stderr if hasattr(self.submit_result, 'stderr') else ''

def wait_for_completion(self, slurmClient, omeroConn) -> str:
"""
Expand All @@ -74,6 +115,7 @@ def wait_for_completion(self, slurmClient, omeroConn) -> str:
logger.warning(
f"Error checking job status:{poll_result.stderr}")
self.job_state = "FAILED"
self.error_message = poll_result.stderr
self.job_state = job_status_dict[self.job_id]
# wait for 10 seconds before checking again
omeroConn.keepAlive() # keep the OMERO connection alive
Expand Down Expand Up @@ -102,8 +144,17 @@ def completed(self):
Returns:
bool: True if the job has completed; False otherwise.
"""
return self.job_state == "COMPLETED"
return self.job_state == "COMPLETED" or self.job_state == "COMPLETED+"

def get_error(self) -> str:
"""
Get the error message associated with the Slurm job submission.
Returns:
str: The error message, or an empty string if no error occurred.
"""
return self.error_message

def __str__(self):
"""
Return a string representation of the SlurmJob instance.
Expand All @@ -127,7 +178,7 @@ class SlurmClient(Connection):
mentions the added ones:
The easiest way to set this client up is by using a slurm-config.ini
and the from-config() method.
and the from_config() method.
Attributes:
slurm_data_path (str): The path to the directory containing the
Expand Down

0 comments on commit 5577904

Please sign in to comment.