Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions bot/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,10 @@ EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE:-${CPU_TARGET}}
export EESSI_SOFTWARE_SUBDIR_OVERRIDE
echo "bot/test.sh: EESSI_SOFTWARE_SUBDIR_OVERRIDE='${EESSI_SOFTWARE_SUBDIR_OVERRIDE}'"

# determine accelerator target (if any) from .architecture in ${JOB_CFG_FILE}
export EESSI_ACCELERATOR_TARGET=$(cfg_get_value "architecture" "accelerator")
echo "bot/test.sh: EESSI_ACCELERATOR_TARGET='${EESSI_ACCELERATOR_TARGET}'"

# get EESSI_OS_TYPE from .architecture.os_type in ${JOB_CFG_FILE} (default: linux)
EESSI_OS_TYPE=$(cfg_get_value "architecture" "os_type")
export EESSI_OS_TYPE=${EESSI_OS_TYPE:-linux}
Expand Down Expand Up @@ -219,6 +223,9 @@ declare -a TEST_SUITE_ARGS=()
if [[ ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} =~ .*/generic$ ]]; then
TEST_SUITE_ARGS+=("--generic")
fi
if [[ ${SHARED_FS_PATH} ]]; then
TEST_SUITE_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}")
fi
# [[ ! -z ${BUILD_LOGS_DIR} ]] && TEST_SUITE_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}")
# [[ ! -z ${SHARED_FS_PATH} ]] && TEST_SUITE_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}")

Expand Down
57 changes: 0 additions & 57 deletions reframe_config_bot.py.tmpl

This file was deleted.

83 changes: 40 additions & 43 deletions test_suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -132,56 +132,42 @@ else
fi

# Configure ReFrame, see https://www.eessi.io/docs/test-suite/installation-configuration
export RFM_CONFIG_FILES=$TOPDIR/reframe_config_bot.py
export RFM_CONFIG_FILE_TEMPLATE=$TOPDIR/reframe_config_bot.py.tmpl
# RFM_CONFIG_FILES _has_ to be set by the site hosting the bot, so that it knows where to find the ReFrame
# config file that matches the bot config. See https://gitlab.com/eessi/support/-/issues/114#note_2293660921
if [ -z "$RFM_CONFIG_FILES" ]; then
if [ -z "${shared_fs_path}" ]; then
fatal_error "Environment variable 'shared_fs_path' was expected, but was not set"
fi
# Try to find a config file at $shared_fs_path/reframe_config.py
export RFM_CONFIG_FILES="${shared_fs_path}/reframe_config.py"
if [ ! -f "${RFM_CONFIG_FILES}" ]; then
# If we haven't found the ReFrame config, print an informative error
err_msg="Please put a ReFrame configuration file in ${shared_fs_path}/reframe_config.py"
err_msg="${err_msg} or set RFM_CONFIG_FILES in the environment of this bot instance to point to a valid"
err_msg="${err_msg} ReFrame configuration file that matches the bot config."
err_msg="${err_msg} For more information, see https://gitlab.com/eessi/support/-/issues/114#note_2293660921"
fatal_error "${err_msg}"
fi
fi
export RFM_CHECK_SEARCH_PATH=$TESTSUITEPREFIX/eessi/testsuite/tests
export RFM_CHECK_SEARCH_RECURSIVE=1
export RFM_PREFIX=$PWD/reframe_runs

echo "Configured reframe with the following environment variables:"
env | grep "RFM_"

# The /sys inside the container is not the same as the /sys of the host
# We want to extract the memory limit from the cgroup on the host (which is typically set by SLURM).
# Thus, bot/test.sh bind-mounts the host's /sys/fs/cgroup into /hostsys/fs/cgroup
# and that's the prefix we use to extract the memory limit from
cgroup_v1_mem_limit="/hostsys/fs/cgroup/memory/$(</proc/self/cpuset)/memory.limit_in_bytes"
cgroup_v2_mem_limit="/hostsys/fs/cgroup/$(</proc/self/cpuset)/memory.max"
if [ -f "$cgroup_v1_mem_limit" ]; then
echo "Getting memory limit from file $cgroup_v1_mem_limit"
cgroup_mem_bytes=$(cat "$cgroup_v1_mem_limit")
elif [ -f "$cgroup_v2_mem_limit" ]; then
echo "Getting memory limit from file $cgroup_v2_mem_limit"
cgroup_mem_bytes=$(cat "$cgroup_v2_mem_limit")
if [ "$cgroup_mem_bytes" = 'max' ]; then
# In cgroupsv2, the memory.max file may contain 'max', meaning the group can use the full system memory
# Here, we get the system memory from /proc/meminfo. Units are supposedly always in kb, but lets match them too
cgroup_mem_kilobytes=$(grep -oP 'MemTotal:\s+\K\d+(?=\s+kB)' /proc/meminfo)
if [[ $? -ne 0 ]] || [[ -z "$cgroup_mem_kilobytes" ]]; then
fatal_error "Failed to get memory limit from /proc/meminfo"
fi
cgroup_mem_bytes=$(("$cgroup_mem_kilobytes"*1024))
fi
else
fatal_error "Both files ${cgroup_v1_mem_limit} and ${cgroup_v2_mem_limit} couldn't be found. Failed to get the memory limit from the current cgroup"
# Get the correct partition name
REFRAME_PARTITION_NAME=${EESSI_SOFTWARE_SUBDIR//\//_}
if [ ! -z "$EESSI_ACCELERATOR_TARGET" ]; then
REFRAME_PARTITION_NAME=${REFRAME_PARTITION_NAME}_${EESSI_ACCELERATOR_TARGET//\//_}
fi
if [[ $? -eq 0 ]]; then
# Convert to MiB
cgroup_mem_mib=$(("$cgroup_mem_bytes"/(1024*1024)))
else
fatal_error "Failed to get the memory limit in bytes from the current cgroup"
fi
echo "Detected available memory: ${cgroup_mem_mib} MiB"
echo "Constructed partition name based on EESSI_SOFTWARE_SUBDIR and EESSI_ACCELERATOR_TARGET: ${REFRAME_PARTITION_NAME}"

cp ${RFM_CONFIG_FILE_TEMPLATE} ${RFM_CONFIG_FILES}
echo "Replacing memory limit in the ReFrame config file with the detected CGROUP memory limit: ${cgroup_mem_mib} MiB"
sed -i "s/__MEM_PER_NODE__/${cgroup_mem_mib}/g" $RFM_CONFIG_FILES
RFM_PARTITION="${SLURM_JOB_PARTITION}"
echo "Replacing partition name in the template ReFrame config file: ${RFM_PARTITION}"
sed -i "s/__RFM_PARTITION__/${RFM_PARTITION}/g" $RFM_CONFIG_FILES
# Set the reframe system name, including partition
export RFM_SYSTEM="BotBuildTests:${REFRAME_PARTITION_NAME}"

echo "Configured reframe with the following environment variables:"
env | grep "RFM_"

# Make debugging easier by printing the final config file:
echo "Final config file (after replacements):"
echo "ReFrame config file used:"
cat "${RFM_CONFIG_FILES}"

# Workaround for https://github.com/EESSI/software-layer/pull/467#issuecomment-1973341966
Expand Down Expand Up @@ -217,7 +203,18 @@ else
fatal_error "Failed to extract names of tests to run: ${REFRAME_NAME_ARGS}"
exit ${test_selection_exit_code}
fi
export REFRAME_ARGS="--tag CI --tag 1_node --nocolor ${REFRAME_NAME_ARGS}"
# Allow people deploying the bot to overrwide this
if [ -z "$REFRAME_SCALE_TAG" ]; then
REFRAME_SCALE_TAG="--tag 1_node"
fi
if [ -z "$REFRAME_CI_TAG" ]; then
REFRAME_CI_TAG="--tag CI"
fi
# Allow bot-deployers to add additional args through the environment
if [ -z "$REFRAME_ADDITIONAL_ARGS" ]; then
REFRAME_ADDITIONAL_ARGS=""
fi
export REFRAME_ARGS="${REFRAME_CI_TAG} ${REFRAME_SCALE_TAG} ${REFRAME_ADDITIONAL_ARGS} --nocolor ${REFRAME_NAME_ARGS}"

# List the tests we want to run
echo "Listing tests: reframe ${REFRAME_ARGS} --list"
Expand Down