Skip to content
This repository was archived by the owner on Jan 28, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/campaign.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ All methods are being used:
constraint tag is given to the sbatch options through the
*--constraint* option.

cluster
~~~~~~~
If value is "slurm", then the network ``nodes`` is filled based on the output
of the ``info`` command. A tag will be also added for every
(partition, feature) tuple formatted like this: ``{partition}_{feature}``.

ssh_config_file
~~~~~~~~~~~~~~~

Expand Down
23 changes: 23 additions & 0 deletions hpcbench/campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
)
from . toolbox.env import expandvars
from . toolbox.functools_ext import listify
from . toolbox.slurm import SlurmCluster


def pip_installer_url(version=None):
Expand Down Expand Up @@ -223,12 +224,34 @@ def network(self):
"""
return self.campaign.network

@property
def slurm(self):
return self.campaign.network.get('cluster') == 'slurm'

def expand(self):
"""Perforn node expansion of network section.
"""
if self.slurm:
self._introspect_slurm_cluster()
self.network.nodes = NetworkConfig._expand_nodes(self.network.nodes)
self._expand_tags()

def _introspect_slurm_cluster(self):
cluster = SlurmCluster()
node_names = set()
tags = dict()
for node in cluster.nodes:
node_names.add(str(node))
for feature in node.active_features:
tag_name = node.partition + '_' + feature
tags.setdefault(tag_name, []).append(str(node))
for tag in tags:
tags[tag] = dict(nodes=tags[tag])
self.network.nodes = list(node_names)
prev_tags = self.network.tags
self.network.tags = tags
self.network.tags.update(prev_tags)

@classmethod
def _expand_nodes(cls, nodes):
if isinstance(nodes, six.string_types):
Expand Down
1 change: 1 addition & 0 deletions hpcbench/toolbox/slurm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from . cluster import SlurmCluster # noqa
68 changes: 68 additions & 0 deletions hpcbench/toolbox/slurm/cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import collections
import csv
import re
import subprocess

from cached_property import cached_property

from .. functools_ext import listify
from .. process import find_executable


SINFO = find_executable('sinfo', required=False)


class SlurmCluster:
def __init__(self, partitions=None):
self.partitions = partitions or self.__class__.discover_partitions()

@cached_property
@listify()
def nodes(self):
for partition_nodes in self.partitions.values():
for node in partition_nodes:
yield node

@classmethod
def discover_partitions(cls):
command = [SINFO, '--Node', '--format', '%all']
output = subprocess.check_output(command)
reader = csv.DictReader(output.splitlines(), delimiter='|')
sanitizer_re = re.compile('[^0-9a-zA-Z]+')

def sanitize(field):
return sanitizer_re.sub('_', field.strip()).lower()
commasplit_fields = {'available_features', 'active_features'}
int_fields = {
'sockets', 'cpus', 'prio_tier', 'threads', 'cores', 'nodes',
'tmp_disk', 'weigth', 'free_mem', 'prio_job_factor', 'memory'
}
float_fields = {'cpu_load'}
reader.fieldnames = [sanitize(field) for field in reader.fieldnames]

class Node(collections.namedtuple('Node', set(reader.fieldnames))):
@property
def name(self):
return self.hostnames

def __str__(self):
return self.name

partitions = dict()
for row in reader:
for key in row:
row[key] = row[key].strip()
conv_type = None
if key in commasplit_fields:
row[key] = row[key].split()
elif key in int_fields:
conv_type = int
elif key in float_fields:
conv_type = float
if conv_type:
try:
row[key] = conv_type(row[key])
except ValueError:
pass
partitions.setdefault(row['partition'], []).append(Node(**row))
return partitions
23 changes: 22 additions & 1 deletion tests/test_slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
import mock
from mock import Mock

from hpcbench.campaign import ReportNode
from hpcbench.campaign import (
from_file,
ReportNode,
)
from hpcbench.driver import (
CampaignDriver,
SbatchDriver,
Expand Down Expand Up @@ -198,3 +201,21 @@ def test_per_tag_sbatch_args(self):
'uc1',
sbatch_str
)


class TestSlurmCluster(unittest.TestCase):
CAMPAIGN_FILE = osp.join(osp.dirname(__file__), 'test_slurm_cluster.yaml')
SINFO_OUTPUT_FILE = osp.join(osp.dirname(__file__),
'toolbox', 'sinfo-mock.txt')

@mock.patch('subprocess.check_output')
def test_campaign_network(self, co_mock):
with open(self.__class__.SINFO_OUTPUT_FILE) as istr:
co_mock.return_value = istr.read()
campaign = from_file(self.__class__.CAMPAIGN_FILE)
self.assertEqual(36, len(campaign.network.nodes))
self.assertEqual(
{'partition_2_rack1', 'uc1', 'uc2',
'partition_1_rack1', 'partition_3_rack1'},
set(campaign.network.tags)
)
7 changes: 7 additions & 0 deletions tests/test_slurm_cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
network:
cluster: slurm
tags:
uc1:
nodes: [n1, n2]
uc2:
constraint: skylake
Loading