Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DONT MERGE] Destroy VM and "retry on failure" policy changes #126

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
59 changes: 26 additions & 33 deletions actions/destroy_vm.meta.yaml 100644 → 100755
@@ -1,36 +1,29 @@
---
name: "destroy_vm"
runner_type: "action-chain"
description: "Destroys a VM and removes it from Route53"
enabled: true
entry_point: "workflows/destroy_vm.yaml"
parameters:
environment:
type: "string"
description: "Environment to deploy to"
default: "staging"
dns_zone:
type: "string"
description: "Route53 DNS Zone to add host to"
default: "uswest2.stackstorm.net"
name: destroy_vm
runner_type: mistral-v2
description: Destroys a VM and removes it from Route53
enabled: true
entry_point: workflows/destroy_vm.yaml
parameters:
hostname:
type: "string"
description: "Short hostname"
required: true
notification_channel:
type: "string"
description: "used by rule with actiontrigger"
immutable: true
default: "#opstown"
notification:
type: "string"
description: "used by rule with actiontrigger"
immutable: true
default: "slack"
type: string
description: Short hostname
required: true
dns_zone:
type: string
description: Route53 DNS zone where the VM is deployed
default: uswest2.stackstorm.net
skip_notify:
default:
- get_instance_dns
- get_instances
- id
- destroy_vm
- delete_cname
immutable: true
default:
- get_instance_dns
- get_instances
- get_volumes
- destroy_instance
- delete_cname
- delete_volumes
- notify_success
- notify_multiple_instances_failure
- notify_destroy_instance_failure
- notify_delete_cname_failure
- notify_delete_volumes_failure
176 changes: 118 additions & 58 deletions actions/workflows/destroy_vm.yaml 100644 → 100755
@@ -1,58 +1,118 @@
---
chain:
-
name: "get_instance_dns"
ref: "linux.dig"
params:
hostname: "{{hostname}}.{{dns_zone}}"
count: 1
on-success: "get_instances"
notify:
on-failure:
routes:
- slack
message: "destroy_vm failure::get_instance_dns failure."
-
name: "get_instances"
ref: "aws.ec2_get_only_instances"
params: {}
on-success: "id"
notify:
on-failure:
routes:
- slack
message: "destroy_vm failure::AWS call to get instances list failed."
-
name: "id"
ref: "core.local"
params:
cmd: "echo '{% for i in get_instances.result -%}{% if (i.private_dns_name + '.') == get_instance_dns.result[0] -%}{{i.id}}{%- endif %}{%- endfor %}'"
on-success: "destroy_vm"
notify:
on-failure:
routes:
- slack
message: "destroy_vm failure::Did not finding matching instance."
-
name: "destroy_vm"
ref: "aws.ec2_terminate_instances"
params:
instance_ids: "{{id.stdout}}"
on-success: "delete_cname"
notify:
on-failure:
routes:
- slack
message: "destroy_vm failure::Terminate instance failed."
-
name: "delete_cname"
ref: "aws.r53_zone_delete_cname"
params:
zone: "{{dns_zone}}"
name: "{{hostname}}.{{dns_zone}}"
notify:
on-failure:
routes:
- slack
message: "destroy_vm failure::Deleting CNAME failed."
default: "get_instance_dns"
version: '2.0'

st2cd.destroy_vm:
description: Destroy VM and associated resources
type: direct
input:
- hostname
- dns_zone
tasks:
get_instance_dns:
action: linux.dig
input:
hostname: <% $.hostname %>.<% $.dns_zone %>
count: 1
on-success:
- get_instances: <% len(task(get_instance_dns).result.result) > 0 %>
- noop: <% len(task(get_instance_dns).result.result) <= 0 %>
get_instances:
action: aws.ec2_get_only_instances
publish:
instances: <%
task(get_instances).result.result.where(
$.private_dns_name + '.' = task(get_instance_dns).result.result[0] or
$.private_ip_address = task(get_instance_dns).result.result[0]
).select($.id)
%>
on-success:
- get_volumes: <% len($.instances) = 1 %>
- notify_multiple_instances_failure: <% len($.instances) > 1 %>
- noop: <% len($.instances) < 1 %>


get_volumes:
action: aws.ec2_get_instance_attribute
input:
attribute: blockDeviceMapping
instance_id: <% $.instances[0] %>
publish:
volumes: <%
task(get_volumes).result.result.first().get(
blockDeviceMapping, {}).values().where(
not $.get(delete_on_termination, True)).select($.volume_id)
%>
on-error:
- notify_destroy_instance_failure
on-success:
- destroy_instance
destroy_instance:
action: aws.ec2_terminate_instances
input:
instance_ids: <% $.instances[0] %>
# Give the instance time to terminate and volume(s) to be detached.
wait-after: 60
on-error:
- notify_destroy_instance_failure
on-success:
- delete_cname
delete_cname:
action: aws.r53_zone_delete_cname
input:
zone: <% $.dns_zone %>
name: <% $.hostname %>.<% $.dns_zone %>
on-error:
- notify_delete_cname_failure
on-success:
- delete_volumes
delete_volumes:
with-items: volume in <% $.volumes %>
action: aws.ec2_delete_volume
input:
volume_id: <% $.volume %>
# The instance may still be terminating and volumes are attached.
retry:
count: 10
delay: 60
on-error:
- notify_delete_volumes_failure
on-success:
- notify_success


notify_success:
action: slack.post_message
input:
channel: "#opstown"
message: "[SUCCEEDED] <% $.hostname %> was destroyed"

notify_multiple_instances_failure:
action: slack.post_message
input:
channel: "#opstown"
message: "[FAILED] More than one instance for <% $.hostname %> were identified"
on-complete:
- fail

notify_destroy_instance_failure:
action: slack.post_message
input:
channel: "#opstown"
message: "[FAILED] <% $.hostname %> was not destroyed"
on-complete:
- fail

notify_delete_cname_failure:
action: slack.post_message
input:
channel: "#opstown"
message: "[FAILED] <% $.hostname %> was destroyed but CNAME was not deleted"
on-complete:
- fail

notify_delete_volumes_failure:
action: slack.post_message
input:
channel: "#opstown"
message: "[FAILED] <% $.hostname %> was destroyed but volumes were not deleted"
on-complete:
- fail
8 changes: 7 additions & 1 deletion actions/workflows/st2workroom_st2enterprise_test.yaml
Expand Up @@ -5,6 +5,12 @@
st2_username: "cibuild"
st2_password: "cibuild"
chain:
-
name: "destroy_vm_if_exists"
ref: "st2cd.destroy_vm"
params:
hostname: "{{hostname}}"
on-success: "create_vm_role"
-
name: "create_vm_role"
ref: "st2cd.create_vm_role"
Expand Down Expand Up @@ -152,4 +158,4 @@
ref: "st2cd.destroy_vm"
params:
hostname: "{{hostname}}"
default: "create_vm_role"
default: "destroy_vm_if_exists"
Expand Up @@ -4,6 +4,12 @@
st2_username: "cibuild"
st2_password: "cibuild"
chain:
-
name: "destroy_vm_if_exists"
ref: "st2cd.destroy_vm"
params:
hostname: "{{hostname}}"
on-success: "create_vm_role"
-
name: "create_vm_role"
ref: "st2cd.create_vm_role"
Expand Down Expand Up @@ -150,4 +156,4 @@
ref: "st2cd.destroy_vm"
params:
hostname: "{{hostname}}"
default: "create_vm_role"
default: "destroy_vm_if_exists"
8 changes: 7 additions & 1 deletion actions/workflows/st2workroom_test.yaml
Expand Up @@ -4,6 +4,12 @@
st2_username: "cibuild"
st2_password: "cibuild"
chain:
-
name: "destroy_vm_if_exists"
ref: "st2cd.destroy_vm"
params:
hostname: "{{hostname}}"
on-success: "create_vm_role"
-
name: "create_vm_role"
ref: "st2cd.create_vm_role"
Expand Down Expand Up @@ -137,4 +143,4 @@
ref: "st2cd.destroy_vm"
params:
hostname: "{{hostname}}"
default: "create_vm_role"
default: "destroy_vm_if_exists"
8 changes: 7 additions & 1 deletion actions/workflows/st2workroom_upgrade_test.yaml
Expand Up @@ -4,6 +4,12 @@
st2_username: "cibuild"
st2_password: "cibuild"
chain:
-
name: "destroy_vm_if_exists"
ref: "st2cd.destroy_vm"
params:
hostname: "{{hostname}}"
on-success: "create_vm_role"
-
name: "create_vm_role"
ref: "st2cd.create_vm_role"
Expand Down Expand Up @@ -204,4 +210,4 @@
ref: "st2cd.destroy_vm"
params:
hostname: "{{hostname}}"
default: "create_vm_role"
default: "destroy_vm_if_exists"
11 changes: 11 additions & 0 deletions policies/retry_st2workroom_st2enterprise_test_on_failure.yaml
@@ -0,0 +1,11 @@
---
name: st2workroom_st2enterprise_test.retry_on_failure
# Note: We retry this run on failure to try to avoid false positives
# which are caused by intermediate networking issues and similar.
description: Retry "st2workroom_st2enterprise_test" tests on failure for up to 1 times.
enabled: true
resource_ref: st2cd.st2workroom_st2enterprise_test
policy_type: action.retry
parameters:
retry_on: failure
max_retry_count: 2
@@ -0,0 +1,11 @@
---
name: st2workroom_st2installer_st2enterprise_test.retry_on_failure
# Note: We retry this run on failure to try to avoid false positives
# which are caused by intermediate networking issues and similar.
description: Retry "st2workroom_st2installer_st2enterprise_test" tests on failure for up to 1 times.
enabled: true
resource_ref: st2cd.st2workroom_st2installer_st2enterprise_test
policy_type: action.retry
parameters:
retry_on: failure
max_retry_count: 2
11 changes: 11 additions & 0 deletions policies/retry_st2workroom_test_on_failure.yaml
@@ -0,0 +1,11 @@
---
name: st2workroom_test.retry_on_failure
# Note: We retry this run on failure to try to avoid false positives
# which are caused by intermediate networking issues and similar.
description: Retry "st2workroom_test" tests on failure for up to 1 times.
enabled: true
resource_ref: st2cd.st2workroom_test
policy_type: action.retry
parameters:
retry_on: failure
max_retry_count: 2