-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #141 from HDFGroup/chogan/auto_cluster
Enable emulated multi-node testing in CI
- Loading branch information
Showing
14 changed files
with
288 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
FROM ubuntu:20.04 | ||
|
||
ENV USER mpirun | ||
|
||
ENV DEBIAN_FRONTEND=noninteractive \ | ||
HOME=/home/${USER} | ||
|
||
RUN apt-get update -y && \ | ||
apt-get install -y --no-install-recommends \ | ||
sudo \ | ||
apt-utils \ | ||
&& apt-get install -y --no-install-recommends \ | ||
openssh-server \ | ||
gcc \ | ||
g++ \ | ||
libfabric-dev \ | ||
mpich \ | ||
binutils \ | ||
&& apt-get clean && apt-get purge && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* | ||
|
||
RUN mkdir /var/run/sshd | ||
RUN echo 'root:${USER}' | chpasswd | ||
RUN sed -i 's/PermitRootLogin without-password/PermitRootLogin yes/' /etc/ssh/sshd_config | ||
|
||
# SSH login fix. Otherwise user is kicked off after login | ||
RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd | ||
|
||
ENV NOTVISIBLE "in users profile" | ||
RUN echo "export VISIBLE=now" >> /etc/profile | ||
|
||
# ------------------------------------------------------------ | ||
# Add an 'mpirun' user | ||
# ------------------------------------------------------------ | ||
|
||
ARG USER_ID | ||
ARG GROUP_ID | ||
|
||
RUN addgroup --gid ${GROUP_ID} ${USER} | ||
RUN adduser --disabled-password --gecos "" --uid ${USER_ID} --gid ${GROUP_ID} ${USER} && \ | ||
echo "${USER} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers | ||
|
||
# ------------------------------------------------------------ | ||
# Set-Up SSH with our Github deploy key | ||
# ------------------------------------------------------------ | ||
|
||
ENV SSHDIR ${HOME}/.ssh/ | ||
|
||
RUN mkdir -p ${SSHDIR} | ||
RUN echo "StrictHostKeyChecking no" > ${SSHDIR}/config | ||
|
||
RUN chmod -R 600 ${SSHDIR}* && \ | ||
chown -R ${USER}:${USER} ${SSHDIR} | ||
|
||
CMD ["/usr/sbin/sshd", "-D"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
. cluster_utils.sh | ||
hermes_cluster_down |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
. cluster_utils.sh | ||
hermes_cluster_test ${1:-} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
. cluster_utils.sh | ||
hermes_cluster_up |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#!/bin/bash | ||
|
||
set -x -e | ||
|
||
node_names=($(awk '/hostname:/ {print $2}' docker-compose.yml)) | ||
docker_user=mpirun | ||
docker_home=/home/${docker_user} | ||
cluster_conf=${docker_home}/hermes.conf | ||
script_dir="$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)" | ||
hermes_build_dir=${script_dir}/../../build | ||
project_name="$(basename ${script_dir})" | ||
host1=${project_name}_${node_names[0]}_1 | ||
host2=${project_name}_${node_names[1]}_1 | ||
|
||
|
||
# Build images and start a cluster | ||
function hermes_cluster_up() { | ||
local num_workers=${1:-1} | ||
local conf_path=${script_dir}/../../test/data/hermes.conf | ||
|
||
# Build the images, passing our user id and group id so the container user can | ||
# modify the .gcda coverage files | ||
for n in "${node_names[@]}"; do | ||
docker-compose build --build-arg GROUP_ID=$(id -g) --build-arg USER_ID=$(id -u) ${n} | ||
done | ||
|
||
docker-compose up -d --scale ${node_names[0]}=1 --scale ${node_names[1]}=${num_workers} --no-recreate | ||
|
||
for h in ${host1} ${host2}; do | ||
# Change the default hermes.conf file to accommodate multiple nodes and | ||
# store it at ${cluster_conf} on each node. | ||
# 1. Replace "./" mount_points and swap_mount with ${docker_home} | ||
# 2. Change rpc_server_base_name to 'node' | ||
# 3. Change num_rpc_threads to 4 | ||
# 4. Change rpc_host_number_range to {1, 2} | ||
docker exec --user ${docker_user} -w ${hermes_build_dir} ${h} \ | ||
bash -c "sed -e 's|\"\./\"|\""${docker_home}"\"|g' \ | ||
-e 's|\"localhost\"|\"node\"|' \ | ||
-e 's|rpc_num_threads = 1|rpc_num_threads = 4|' \ | ||
-e 's|{0, 0}|{1, 2}|' ${conf_path} > ${cluster_conf}" | ||
|
||
# Copy ssh keys to ${docker_home}/.ssh | ||
docker exec ${h} bash -c "cp ${HOME}/.ssh/id_rsa ${docker_home}/.ssh/id_rsa" | ||
docker exec ${h} bash -c "cp ${HOME}/.ssh/id_rsa.pub ${docker_home}/.ssh/id_rsa.pub" | ||
docker exec ${h} bash -c "cp ${HOME}/.ssh/id_rsa.pub ${docker_home}/.ssh/authorized_keys" | ||
docker exec ${h} chown -R ${docker_user}:${docker_user} ${docker_home}/.ssh | ||
done | ||
} | ||
|
||
function hermes_cluster_test() { | ||
local allocate_tty=${1:-} | ||
local hosts=${host1},${host2} | ||
|
||
docker-compose exec ${allocate_tty} \ | ||
-e GLOG_vmodule=rpc_thallium=1 \ | ||
-e LSAN_OPTIONS=suppressions=../test/data/asan.supp \ | ||
--user ${docker_user} \ | ||
-w ${hermes_build_dir} \ | ||
${node_names[0]} \ | ||
mpirun -n 4 -ppn 2 -hosts ${hosts} bin/end_to_end_test ${cluster_conf} | ||
} | ||
|
||
# Stop the cluster | ||
function hermes_cluster_down() { | ||
docker-compose down | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
version: "3" | ||
|
||
services: | ||
node1: | ||
build: . | ||
links: | ||
- node2 | ||
networks: | ||
- net | ||
volumes: | ||
- $HOME:$HOME | ||
hostname: node1 | ||
|
||
node2: | ||
build: . | ||
networks: | ||
- net | ||
volumes: | ||
- $HOME:$HOME | ||
hostname: node2 | ||
|
||
networks: | ||
net: | ||
driver: bridge |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!/bin/bash | ||
|
||
set -x -e | ||
|
||
. cluster_utils.sh | ||
|
||
# Create ssh keys for the cluster to use | ||
echo -e 'y\n' | ssh-keygen -q -t rsa -N "" -f ~/.ssh/id_rsa | ||
|
||
# Start a two node cluster | ||
hermes_cluster_up | ||
|
||
# Run the Hermes tests on the cluster (without allocating a tty) | ||
hermes_cluster_test "-T" | ||
|
||
# Shutdown the cluster | ||
hermes_cluster_down |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.