Skip to content

Commit

Permalink
fix(ssh): change cpu_load_indicator calculation as it was wrong
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinBelthle committed Aug 28, 2023
1 parent 31f3d6a commit 96d4ec5
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 30 deletions.
12 changes: 0 additions & 12 deletions antarest/launcher/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,18 +737,6 @@ def get_load(self) -> LauncherLoadDTO:
launcher_status="SUCCESS",
)

def get_versions(self) -> Dict[str, List[str]]:
version_dict = {}
if self.config.launcher.local:
version_dict["local"] = list(
self.config.launcher.local.binaries.keys()
)

if self.config.launcher.slurm:
version_dict[
"slurm"
] = self.config.launcher.slurm.antares_versions_on_remote_server

def get_solver_versions(self, solver: str) -> List[str]:
"""
Fetch the list of solver versions from the configuration.
Expand Down
31 changes: 17 additions & 14 deletions antarest/launcher/ssh_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,23 @@ def execute_command(ssh_config: SSHConfigDTO, args: List[str]) -> Any:

def parse_cpu_used(sinfo_output: str) -> float:
cpu_info_splitted = sinfo_output.split("/")
amount_cpu_used = int(cpu_info_splitted[0])
total_amount_of_cpus_usable = amount_cpu_used + int(cpu_info_splitted[1])
return 100 * (amount_cpu_used / total_amount_of_cpus_usable)
cpu_used_count = int(cpu_info_splitted[0])
cpu_inactive_count = int(cpu_info_splitted[1])
return 100 * cpu_used_count / (cpu_used_count + cpu_inactive_count)


def parse_cpu_load(sinfo_output: str) -> float:
cpus_load_splitted = sinfo_output.replace(" ", "").splitlines()
cpt = 0
total_load = 0.0
for load in cpus_load_splitted:
if load != "N/A":
cpt += 1
total_load += float(load)
return total_load / cpt
lines = sinfo_output.splitlines()
cpus_used = 0.0
cpus_available = 0.0
for line in lines:
values = line.split()
if "N/A" in values:
continue
cpus_used += float(values[0])
cpus_available += float(values[1])
ratio = cpus_used / max(cpus_available, 1)
return 100 * min(1.0, ratio)


def calculates_slurm_load(
Expand All @@ -74,7 +77,7 @@ def calculates_slurm_load(
ssh_config,
["sinfo", "--partition", partition, "-O", "NodeAIOT", "--noheader"],
)
allocated_cpus = 100 * parse_cpu_used(sinfo_cpus_used)
allocated_cpus = parse_cpu_used(sinfo_cpus_used)
# cluster load
sinfo_cpus_load = execute_command(
ssh_config,
Expand All @@ -84,11 +87,11 @@ def calculates_slurm_load(
partition,
"-N",
"-O",
"CPUsLoad",
"CPUsLoad,CPUs",
"--noheader",
],
)
cluster_load = min(100.0, parse_cpu_load(sinfo_cpus_load))
cluster_load = parse_cpu_load(sinfo_cpus_load)
# queued jobs
queued_jobs = int(
execute_command(
Expand Down
9 changes: 5 additions & 4 deletions tests/launcher/test_ssh_client.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
from unittest.mock import Mock

import pytest
Expand All @@ -17,10 +18,10 @@ def test_parse_cpu_used():

@pytest.mark.unit_test
def test_parse_cpu_load():
sinfo_output = "0.01 \n0.01 \n2.88 \n20.13 \nN / A \n80.17"
assert (
parse_cpu_load(sinfo_output)
== (0.01 + 0.01 + 2.88 + 20.13 + 80.17) / 5
sinfo_output = "0.01 24 \n0.01 24 \nN/A 24 \n9.94 24 "
assert math.isclose(
parse_cpu_load(sinfo_output),
100 * (0.01 + 0.01 + 9.94) / (24 + 24 + 24),
)


Expand Down

0 comments on commit 96d4ec5

Please sign in to comment.