Skip to content

Commit 58f8b31

Browse files
authored
Merge pull request #218 from ScaleComputing/shutdown-tagged-vms
Shutdown tagged vms
2 parents 5147089 + 242a395 commit 58f8b31

File tree

15 files changed

+837
-19
lines changed

15 files changed

+837
-19
lines changed
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
---
2+
# Example invocation:
3+
# ansible-playbook -e '{"vm_shutdown_tags": ["gpu-passthrough", "usb-passthrough"]}' examples/shutdown_restart_tagged_vms.yml
4+
- name: Shut down and start back HyperCore VMs with specific tags
5+
hosts: localhost
6+
connection: local
7+
gather_facts: false
8+
vars:
9+
vm_shutdown_tags:
10+
- gpu-passthrough
11+
- usb-passthrough
12+
13+
tasks:
14+
# ------------------------------------------------------
15+
- name: Store initial/desired VMs state
16+
scale_computing.hypercore.vm_info:
17+
register: vm_desired_state
18+
19+
# ------------------------------------------------------
20+
- name: List tagged VMs power-state before shutdown
21+
block: &list-tagged-vms
22+
- name: List all VMs
23+
scale_computing.hypercore.vm_info:
24+
register: vm_info_result
25+
26+
- name: Find VMs with matching tag
27+
ansible.builtin.shell: |
28+
#!/usr/bin/env python
29+
import json
30+
import sys
31+
data_str = sys.stdin.read()
32+
data = json.loads(data_str)
33+
all_vms = data["all_vms"]
34+
vm_shutdown_tags = data["vm_shutdown_tags"]
35+
tagged_vms = []
36+
for vm in all_vms:
37+
for tag in vm["tags"]:
38+
if tag in vm_shutdown_tags:
39+
tagged_vms.append(vm)
40+
break
41+
print(json.dumps(tagged_vms))
42+
args:
43+
# /usr/bin/python3 - fedora
44+
# /usr/local/bin/python - python:3.10-slim-buster docker image
45+
executable: python3
46+
stdin: "{{ stdin_data | to_json }}"
47+
vars:
48+
stdin_data:
49+
all_vms: "{{ vm_info_result.records }}"
50+
vm_shutdown_tags: "{{ vm_shutdown_tags }}"
51+
changed_when: false
52+
register: tagged_vms_result
53+
54+
- name: Show tagged VMs
55+
ansible.builtin.debug:
56+
msg: |
57+
vm_names={{ tagged_vms_result.stdout | from_json | map(attribute='vm_name') }}
58+
power_state={{ tagged_vms_result.stdout | from_json | map(attribute='power_state') }}
59+
60+
# ------------------------------------------------------
61+
- name: Shutdown running VMs with specific tags
62+
ansible.builtin.include_role:
63+
name: scale_computing.hypercore.version_update_single_node
64+
tasks_from: shutdown_vms
65+
vars:
66+
scale_computing_hypercore_shutdown_vms: "{{ vm_desired_state }}"
67+
scale_computing_hypercore_shutdown_tags: "{{ vm_shutdown_tags }}"
68+
69+
- name: List tagged VMs power-state after shutdown
70+
block: *list-tagged-vms
71+
72+
# ------------------------------------------------------
73+
- name: Start back VMs with specific tags
74+
ansible.builtin.include_role:
75+
name: scale_computing.hypercore.version_update_single_node
76+
tasks_from: restart_vms.yml
77+
vars:
78+
scale_computing_hypercore_restart_vms: "{{ vm_desired_state }}"
79+
80+
- name: List tagged VMs power-state after restart
81+
block: *list-tagged-vms
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
---
2+
scale_computing_hypercore_shutdown_wait_time: 300
3+
scale_computing_hypercore_shutdown_tags: []

roles/version_update_single_node/meta/argument_specs.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,44 @@ argument_specs:
1313
- If multi-node system was detected, no update will be applied.
1414
required: true
1515
type: str
16+
scale_computing_hypercore_shutdown_wait_time: &scale_computing_hypercore_shutdown_wait_time
17+
description:
18+
- How much time (in seconds) VMs have to gracefully shutdown.
19+
- After wait time expires a force shutdown is issued. Force shutdown can corrupt VM disk data.
20+
default: 300
21+
type: int
22+
23+
shutdown_vms:
24+
short_description: Shutdown running VMs before upgrade
25+
description:
26+
- Taskfile shutdown_vms is used to shutdown running VMs.
27+
- Input is a list of VMs, as returned by M(scale_computing.hypercore.vm_info) module.
28+
The VMs listed as `running` in the list are then shutdown.
29+
options:
30+
scale_computing_hypercore_shutdown_wait_time: *scale_computing_hypercore_shutdown_wait_time
31+
scale_computing_hypercore_shutdown_vms:
32+
description:
33+
- VM list as returned by M(scale_computing.hypercore.vm_info) module.
34+
required: true
35+
type: dict
36+
scale_computing_hypercore_shutdown_tags:
37+
description:
38+
- VM will be shutdown only if it has assigned (at least one) tag from this list.
39+
- If tag list is empty, than every running VM from the VM list is shutdown.
40+
required: false
41+
type: list
42+
elements: str
43+
default: []
44+
45+
restart_vms:
46+
short_description: Start VMs that were running before upgrade
47+
description:
48+
- Taskfile restart_vms is used start VMs that were running before upgrade.
49+
- Input is a list of VMs, as returned by M(scale_computing.hypercore.vm_info) module.
50+
The VMs listed as `running` in the list are then started.
51+
options:
52+
scale_computing_hypercore_restart_vms:
53+
description:
54+
- VM list as returned by M(scale_computing.hypercore.vm_info) module.
55+
required: true
56+
type: dict

roles/version_update_single_node/tasks/main.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
- name: Shutdown all running VMs
5757
include_tasks: shutdown_vms.yml
5858
vars:
59-
vms: "{{ vm_info }}"
59+
scale_computing_hypercore_shutdown_vms: "{{ vm_info }}"
6060
when: vms.records != []
6161

6262
# ----------------- UPDATE --------------------
@@ -78,7 +78,7 @@
7878
- name: Restart previously running VMs
7979
include_tasks: restart_vms.yml
8080
vars:
81-
vms: "{{ vm_info }}"
81+
scale_computing_hypercore_restart_vms: "{{ vm_info }}"
8282
when: vms.records != []
8383

8484
- name: Check if updating to desired version failed

roles/version_update_single_node/tasks/restart_vms.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
vm_name: "{{ item.vm_name }}"
55
power_state: start
66
when: item.power_state == 'started'
7-
loop: "{{ vms.records }}"
7+
loop: "{{ scale_computing_hypercore_restart_vms.records }}"
88
register: vm_start_result
99

1010
- name: Show restart results

roles/version_update_single_node/tasks/shutdown_vms.yml

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,29 @@
33
ansible.builtin.debug:
44
msg: "{{ item.vm_name }}"
55
when: item.power_state == 'started'
6-
loop: "{{ vms.records }}"
6+
loop: "{{ scale_computing_hypercore_shutdown_vms.records }}"
77
register: running_vms
88

99
- name: Shutdown running VMs
1010
scale_computing.hypercore.vm_params:
1111
vm_name: "{{ item.vm_name }}"
1212
power_state: shutdown
13-
when: item.power_state == 'started'
14-
loop: "{{ vms.records }}"
13+
when:
14+
- item.power_state == 'started'
15+
- (scale_computing_hypercore_shutdown_tags == []) or (scale_computing_hypercore_shutdown_tags | intersect(item.tags))
16+
loop: "{{ scale_computing_hypercore_shutdown_vms.records }}"
1517
register: vm_shutdown_result
1618
ignore_errors: true # if VMs fail to shut down without force, error will occur, so we skip and try on to shut down with force
1719

20+
- name: Set fact version_update_all_vms_stopped to initial false
21+
ansible.builtin.set_fact:
22+
version_update_all_vms_stopped: false
23+
1824
# Wait up to 300 sec (30*10)
1925
- name: Wait until VMs shutdown
2026
include_tasks: wait_vm_shutdown.yml
21-
loop: "{{ range(0, 30) | list }}"
22-
when: version_update_all_vms_stopped | default(true)
27+
loop: "{{ range(0, (scale_computing_hypercore_shutdown_wait_time / 10.0) | round(0, 'ceil') | int) | list }}"
28+
when: not version_update_all_vms_stopped
2329

2430
- name: Show shutdown results
2531
ansible.builtin.debug:
@@ -29,7 +35,9 @@
2935
scale_computing.hypercore.vm_params:
3036
vm_name: "{{ item.item.vm_name }}"
3137
power_state: stop
32-
when: item.item.power_state == 'started'
38+
when:
39+
- item.item.power_state == 'started'
40+
- (scale_computing_hypercore_shutdown_tags == []) or (scale_computing_hypercore_shutdown_tags | intersect(item.item.tags))
3341
loop: "{{ vm_shutdown_result.results }}"
3442
register: vm_stop_result
3543

roles/version_update_single_node/tasks/wait_vm_shutdown.yml

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,19 @@
2525
# CRASHED="crashed",
2626
# Do not include 'shutdown' - it means "shutting_down"
2727
# States paused, blocked - might be safe to include, might not. Do not include yet.
28-
- name: Set fact version_update_all_vms_stopped
28+
- name: Set fact version_update_all_vms_stopped to initial true
2929
ansible.builtin.set_fact:
30-
version_update_all_vms_stopped: |
31-
{{
32-
(
33-
version_update_vms.records | map(attribute='power_state') | unique) |
34-
ansible.builtin.difference(['stopped', 'crashed']
35-
) == []
36-
}}
30+
version_update_all_vms_stopped: true
31+
32+
# We wait for VMs to shutdown, but only if they are included in the scale_computing_hypercore_shutdown_vms list.
33+
- name: Reset version_update_all_vms_stopped if any VM is still running
34+
ansible.builtin.set_fact:
35+
version_update_all_vms_stopped: false
36+
when:
37+
- (version_update_vms.records | selectattr("vm_name", "equalto", item.vm_name) | list).0.power_state not in ['stopped', 'crashed']
38+
- (scale_computing_hypercore_shutdown_tags == []) or (scale_computing_hypercore_shutdown_tags | intersect(item.tags))
39+
loop: "{{ scale_computing_hypercore_shutdown_vms.records }}"
40+
register: vm_shutdown_result
3741

3842
- name: Are all VMs stopped?
3943
ansible.builtin.debug:

tests/integration/integration_config.yml.j2

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,9 @@ sc_config:
6060
config_url_test: https://login.microsoftonline.com/76d4c62a-a9ca-4dc2-9187-e2cc4d9abe7f/v2.0/.well-known/openid-configuration
6161
cluster_shutdown:
6262
magic_allow_string: "oh-no-no" # use "allow-cluster-shutdown-test" to allow this integration test
63-
version_update:
64-
magic_allow_string: "oh-no-no" # use "allow-version-update-test" to allow this integration test
63+
# version_update:
64+
# magic_allow_string: "oh-no-no" # use "allow-version-update-test" to allow this integration test
65+
# vm_shutdown_restart_allow_string: "oh-no-no" # use "allow-vm-shutdown-restart-test" to test VM shutdown/restart
6566
syslog_server:
6667
host: 10.5.11.222
6768

@@ -85,6 +86,9 @@ sc_config:
8586
smtp:
8687
<<: *base_smtp
8788
from_address: PUB5@scalecomputing.com
89+
version_update:
90+
magic_allow_string: "oh-no-no"
91+
vm_shutdown_restart_allow_string: "oh-no-no"
8892
# Under features are described properties that affect expected integration test output.
8993
features:
9094
version_update:
@@ -125,6 +129,9 @@ sc_config:
125129
smtp:
126130
<<: *base_smtp
127131
from_address: VSNS200@scalecomputing.com
132+
version_update:
133+
magic_allow_string: "oh-no-no"
134+
vm_shutdown_restart_allow_string: "allow-vm-shutdown-restart-test"
128135
syslog_server:
129136
host: 10.5.11.222
130137
features:
@@ -157,6 +164,9 @@ sc_config:
157164
smtp:
158165
<<: *base_smtp
159166
from_address: VSNS201@scalecomputing.com
167+
version_update:
168+
magic_allow_string: "oh-no-no"
169+
vm_shutdown_restart_allow_string: "allow-vm-shutdown-restart-test"
160170
syslog_server:
161171
host: 10.5.11.222
162172
features:
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# version_update/shutdown_vms.yml default behaviour - shutdown all running VMs
2+
3+
# ------------------------------------------------------------------------------------------------------------------
4+
# before shutdown
5+
- name: Check current power state for test VMs
6+
scale_computing.hypercore.vm_info:
7+
vm_name: "{{ vm_name }}"
8+
register: vm_info_results
9+
loop: "{{ vm_names }}"
10+
loop_control:
11+
loop_var: vm_name
12+
13+
- name: Show power_state before shutdown
14+
ansible.builtin.debug:
15+
var: vm_info_results.results | map(attribute='records.0.power_state')
16+
17+
- name: Check VMs were initially running
18+
ansible.builtin.assert:
19+
that:
20+
- vm_info_results.results | map(attribute="records.0.vm_name") == vm_names
21+
- vm_info_results.results | map(attribute="records.0.power_state") == ["started", "started", "started"]
22+
23+
# ------------------------------------------------------------------------------------------------------------------
24+
# do shutdown
25+
- name: List all VMs
26+
scale_computing.hypercore.vm_info:
27+
register: vm_info_a
28+
29+
- name: Shutdown all running VMs
30+
ansible.builtin.include_role:
31+
name: scale_computing.hypercore.version_update_single_node
32+
tasks_from: shutdown_vms.yml
33+
vars:
34+
scale_computing_hypercore_shutdown_vms: "{{ vm_info_a }}"
35+
36+
# ------------------------------------------------------------------------------------------------------------------
37+
# after shutdown
38+
- name: List all VMs
39+
scale_computing.hypercore.vm_info:
40+
register: vm_info_b
41+
42+
- name: Show power_state after shutdown
43+
ansible.builtin.debug:
44+
var: vm_info_b.records | map(attribute="power_state")
45+
46+
- name: Check all VMs are shutdown
47+
ansible.builtin.assert:
48+
that:
49+
- vm_info_b.records | map(attribute="power_state") | difference(["stopped"]) == []
50+
51+
# ------------------------------------------------------------------------------------------------------------------
52+
# do restart
53+
- name: Restart VMs
54+
ansible.builtin.include_role:
55+
name: scale_computing.hypercore.version_update_single_node
56+
tasks_from: restart_vms.yml
57+
vars:
58+
scale_computing_hypercore_restart_vms: "{{ vm_info_a }}"
59+
60+
# ------------------------------------------------------------------------------------------------------------------
61+
# after restart
62+
- name: List all VMs
63+
scale_computing.hypercore.vm_info:
64+
register: vm_info_c
65+
66+
- name: Show power_state after restart - all VMs
67+
ansible.builtin.debug:
68+
var: vm_info_c.records | map(attribute="power_state")
69+
70+
- name: Check current power state for test VMs
71+
scale_computing.hypercore.vm_info:
72+
vm_name: "{{ vm_name }}"
73+
register: vm_info_results_b
74+
loop: "{{ vm_names }}"
75+
loop_control:
76+
loop_var: vm_name
77+
78+
- name: Show power_state after restart - test VMs
79+
ansible.builtin.debug:
80+
var: vm_info_results_b.results | map(attribute="records.0.power_state")
81+
82+
- name: Check test VMs are running again
83+
ansible.builtin.assert:
84+
that:
85+
- vm_info_results_b.results | map(attribute="records.0.vm_name") == vm_names
86+
- vm_info_results_b.results | map(attribute="records.0.power_state") == ["started", "started", "started"]

0 commit comments

Comments
 (0)