Skip to content

Commit

Permalink
Merge pull request #51 from HECBioSim/issue24
Browse files Browse the repository at this point in the history
Issue #24
  • Loading branch information
jimboid committed Jun 7, 2017
2 parents f171375 + 96ddff1 commit 99b2724
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 38 deletions.
9 changes: 8 additions & 1 deletion longbow/corelibs/configuration.py
Expand Up @@ -54,8 +54,9 @@
"""

import logging
import re
import os
import re
import time
from random import randint

import longbow.corelibs.exceptions as exceptions
Expand Down Expand Up @@ -85,6 +86,7 @@
"polling-frequency": "300",
"port": "22",
"queue": "",
"recoveryfile": "",
"remoteworkdir": "",
"resource": "",
"replicates": "1",
Expand Down Expand Up @@ -443,6 +445,11 @@ def _processconfigsfinalinit(jobs):
LOG.debug("Job '%s' will be run in the '%s' directory on the remote "
"resource.", job, jobs[job]["destdir"])

# Create a recovery file.
jobs[job]["recoveryfile"] = (
os.path.join(os.path.expanduser('~/.longbow'), "recovery-" +
time.strftime("%Y%m%d-%H%M%S")))


def _processconfigsparams(jobs, parameters, jobdata, hostdata):
"""A private method to assimilate all parameters into jobs dict."""
Expand Down
16 changes: 9 additions & 7 deletions longbow/corelibs/scheduling.py
Expand Up @@ -62,8 +62,6 @@

LOG = logging.getLogger("longbow.corelibs.scheduling")
QUEUEINFO = {}
JOBFILE = os.path.join(os.path.expanduser('~/.longbow'), "recovery-" +
time.strftime("%Y%m%d-%H%M%S"))


def checkenv(jobs, hostconf):
Expand Down Expand Up @@ -208,6 +206,7 @@ def monitor(jobs):
allfinished = False
lastpolltime = 0
laststagetime = 0
recoveryfile = jobs[list(jobs.keys())[0]]["recoveryfile"]
saverecoveryfile = True
recoveryfileerror = False

Expand Down Expand Up @@ -241,13 +240,14 @@ def monitor(jobs):

# Save out the recovery files.
if (os.path.isdir(os.path.expanduser('~/.longbow')) and
saverecoveryfile is True and recoveryfileerror is False):
saverecoveryfile is True and recoveryfileerror is False and
recoveryfile != ""):

saverecoveryfile = False

try:

configuration.saveini(JOBFILE, jobs)
configuration.saveini(recoveryfile, jobs)

except (OSError, IOError):

Expand Down Expand Up @@ -412,13 +412,15 @@ def submit(jobs):
job["queue-max"] = QUEUEINFO[job["resource"]]["queue-max"]

# Save out the recovery files.
if os.path.isdir(os.path.expanduser('~/.longbow')):
if (os.path.isdir(os.path.expanduser('~/.longbow')) and
job["recoveryfile"] != ""):

try:

LOG.info("Recovery file will be placed at path '%s'", JOBFILE)
LOG.info("Recovery file will be placed at path '%s'",
job["recoveryfile"])

configuration.saveini(JOBFILE, jobs)
configuration.saveini(job["recoveryfile"], jobs)

except (OSError, IOError):

Expand Down
8 changes: 8 additions & 0 deletions longbow/corelibs/staging.py
Expand Up @@ -46,6 +46,7 @@
"""

import logging
import os

import longbow.corelibs.exceptions as exceptions
import longbow.corelibs.shellwrappers as shellwrappers
Expand Down Expand Up @@ -205,4 +206,11 @@ def cleanup(jobs):

pass

if (jobs[list(jobs.keys())[0]]["recoveryfile"] != "" and
os.path.isfile(jobs[list(jobs.keys())[0]]["recoveryfile"])):

LOG.info("Removing the recovery file.")

os.remove(jobs[list(jobs.keys())[0]]["recoveryfile"])

LOG.info("Cleaning up complete.")
Expand Up @@ -112,6 +112,7 @@ def test_processconfigsresource1():
"polling-frequency": "300",
"port": "22",
"queue": "",
"recoveryfile": "",
"remoteworkdir": "",
"resource": "host1",
"replicates": "1",
Expand Down Expand Up @@ -215,6 +216,7 @@ def test_processconfigsresource2():
"sge-peoverride": "false",
"port": "22",
"queue": "",
"recoveryfile": "",
"remoteworkdir": "",
"resource": "host2",
"replicates": "1",
Expand Down Expand Up @@ -319,6 +321,7 @@ def test_processconfigsresource3():
"sge-peoverride": "false",
"port": "22",
"queue": "",
"recoveryfile": "",
"remoteworkdir": "",
"resource": "host1",
"replicates": "1",
Expand Down Expand Up @@ -422,6 +425,7 @@ def test_processconfigsresource4():
"sge-peoverride": "false",
"port": "22",
"queue": "",
"recoveryfile": "",
"remoteworkdir": "",
"resource": "host3",
"replicates": "1",
Expand Down
54 changes: 36 additions & 18 deletions tests/unit/corelibs_scheduling/test_monitor.py
Expand Up @@ -70,7 +70,8 @@ def test_monitor_testpollfrequency(mock_init, mock_poll, mock_wait):
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Running"
"laststatus": "Running",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
}
}
QUEUEINFO["hpc1"] = {}
Expand Down Expand Up @@ -111,7 +112,8 @@ def test_monitor_teststagefreq(mock_init, mock_poll, mock_wait, mock_down):
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Running"
"laststatus": "Running",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
}
}
QUEUEINFO["hpc1"] = {}
Expand Down Expand Up @@ -154,19 +156,22 @@ def test_monitor_complete1(mock_init, mock_poll, mock_wait, mock_down,
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Finished"
"laststatus": "Finished",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
},
"jobtwo": {
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Complete"
"laststatus": "Complete",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
},
"jobthree": {
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Submit Error"
"laststatus": "Submit Error",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
}
}

Expand Down Expand Up @@ -205,31 +210,36 @@ def test_monitor_complete2(mock_init, mock_poll, mock_wait, mock_down,
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Finished"
"laststatus": "Finished",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
},
"jobtwo": {
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Complete"
"laststatus": "Complete",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
},
"jobthree": {
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Submit Error"
"laststatus": "Submit Error",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
},
"jobfour": {
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Queued"
"laststatus": "Queued",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
},
"jobfive": {
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Running"
"laststatus": "Running",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
}
}

Expand Down Expand Up @@ -272,31 +282,36 @@ def test_monitor_run1(mock_init, mock_poll, mock_wait, mock_down,
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Running"
"laststatus": "Running",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
},
"jobtwo": {
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Running"
"laststatus": "Running",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
},
"jobthree": {
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Queued"
"laststatus": "Queued",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
},
"jobfour": {
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Queued"
"laststatus": "Queued",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
},
"jobfive": {
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Queued"
"laststatus": "Queued",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
}
}

Expand Down Expand Up @@ -340,19 +355,22 @@ def test_monitor_except(mock_init, mock_poll, mock_wait, mock_down,
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Finished"
"laststatus": "Finished",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
},
"jobtwo": {
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Complete"
"laststatus": "Complete",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
},
"jobthree": {
"resource": "hpc1",
"queue-max": "0",
"queue-slots": "0",
"laststatus": "Submit Error"
"laststatus": "Submit Error",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
}
}
QUEUEINFO["hpc1"] = {}
Expand Down
58 changes: 56 additions & 2 deletions tests/unit/corelibs_scheduling/test_submit.py
Expand Up @@ -136,6 +136,58 @@ def test_submit_multiplediff(mock_isdir, mock_lsf, mock_pbs, mock_slurm):
"For a single job this method should only be called once"


@mock.patch('longbow.corelibs.configuration.saveini')
@mock.patch('longbow.schedulers.lsf.submit')
@mock.patch('os.path.isdir')
def test_submit_filewrite(mock_isdir, mock_submit, mock_savini):

"""
Test that the recovery file write happens if everything is working.
"""

jobs = {
"job-one": {
"resource": "test-machine",
"scheduler": "LSF",
"jobid": "test456",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
}
}

mock_isdir.return_value = True
mock_submit.return_value = None

submit(jobs)

assert mock_savini.call_count == 1


@mock.patch('longbow.corelibs.configuration.saveini')
@mock.patch('longbow.schedulers.lsf.submit')
@mock.patch('os.path.isdir')
def test_submit_fileuninit(mock_isdir, mock_submit, mock_savini):

"""
Test that if the recovery file is uninitialised that no writing happens.
"""

jobs = {
"job-one": {
"resource": "test-machine",
"scheduler": "LSF",
"jobid": "test456",
"recoveryfile": ""
}
}

mock_isdir.return_value = True
mock_submit.return_value = None

submit(jobs)

assert mock_savini.call_count == 0


@mock.patch('longbow.corelibs.configuration.saveini')
@mock.patch('longbow.schedulers.lsf.submit')
@mock.patch('os.path.isdir')
Expand All @@ -149,7 +201,8 @@ def test_submit_fileexcept1(mock_isdir, mock_submit, mock_savini):
"job-one": {
"resource": "test-machine",
"scheduler": "LSF",
"jobid": "test456"
"jobid": "test456",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
}
}

Expand All @@ -173,7 +226,8 @@ def test_submit_fileexcept2(mock_isdir, mock_submit, mock_savini):
"job-one": {
"resource": "test-machine",
"scheduler": "LSF",
"jobid": "test456"
"jobid": "test456",
"recoveryfile": "recovery-YYMMDD-HHMMSS"
}
}

Expand Down

0 comments on commit 99b2724

Please sign in to comment.