Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/test_role_elasticsearch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ jobs:
release:
- 8
- 9
include:
- distro: debian13
scenario: elasticsearch_diagnostics
release: 9

steps:
- name: Check out code
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ __pycache__/
*.swp
*.tar.gz
venv/
tests/integration/
65 changes: 65 additions & 0 deletions molecule/elasticsearch_diagnostics/converge.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
---
- name: Install Elasticsearch with good config
hosts: all
vars:
elasticstack_full_stack: false
elasticstack_release: "{{ lookup('env', 'ELASTIC_RELEASE') | default('9', true) | int }}"
elasticsearch_heap: "1"
elasticstack_no_log: false
tasks:
- name: Include Elastic repos role
ansible.builtin.include_role:
name: oddly.elasticstack.repos

- name: Include Elasticsearch role
ansible.builtin.include_role:
name: oddly.elasticstack.elasticsearch

- name: CI — set lenient disk watermarks
ansible.builtin.include_tasks: ../shared/set_ci_watermarks.yml

- name: Test startup failure diagnostics
hosts: all
tasks:
- name: Back up good config
ansible.builtin.copy:
src: /etc/elasticsearch/elasticsearch.yml
dest: /etc/elasticsearch/elasticsearch.yml.good
remote_src: true
mode: "0644"

- name: Test bad config produces fast failure with diagnostics
block:
- name: Inject bad setting into elasticsearch.yml
ansible.builtin.lineinfile:
path: /etc/elasticsearch/elasticsearch.yml
line: "bogus.nonexistent.setting: true"

- name: Attempt restart with bad config (should fail with diagnostics)
ansible.builtin.include_tasks:
file: "{{ lookup('env', 'ANSIBLE_COLLECTIONS_PATH') | default(lookup('env', 'HOME') + '/.ansible/collections', true) }}/ansible_collections/oddly/elasticstack/roles/elasticsearch/tasks/restart_and_verify_elasticsearch.yml"

- name: This should not be reached
ansible.builtin.fail:
msg: "restart_and_verify_elasticsearch.yml did not fail on bad config"

rescue:
- name: Assert failure includes diagnostic log output
ansible.builtin.assert:
that:
- ansible_failed_result.msg is defined
- "'Recent log output' in ansible_failed_result.msg"
fail_msg: >-
Expected failure message with 'Recent log output', got:
{{ ansible_failed_result.msg | default('no message') }}

- name: Restore good config
ansible.builtin.copy:
src: /etc/elasticsearch/elasticsearch.yml.good
dest: /etc/elasticsearch/elasticsearch.yml
remote_src: true
mode: "0644"

- name: Restart Elasticsearch with restored config
ansible.builtin.include_tasks:
file: "{{ lookup('env', 'ANSIBLE_COLLECTIONS_PATH') | default(lookup('env', 'HOME') + '/.ansible/collections', true) }}/ansible_collections/oddly/elasticstack/roles/elasticsearch/tasks/restart_and_verify_elasticsearch.yml"
1 change: 1 addition & 0 deletions molecule/elasticsearch_diagnostics/create.yml
1 change: 1 addition & 0 deletions molecule/elasticsearch_diagnostics/destroy.yml
45 changes: 45 additions & 0 deletions molecule/elasticsearch_diagnostics/molecule.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
---
prerun: false
dependency:
name: galaxy
options:
requirements-file: requirements.yml
driver:
name: default
platforms:
- name: "es-diag-${MOLECULE_DISTRO:-debian11}-r${ELASTIC_RELEASE:-9}"
groups:
- elasticsearch
distro: "${MOLECULE_DISTRO:-debian11}"
memory_mb: 3328
provisioner:
name: ansible
env:
ANSIBLE_LOG_PATH: /var/log/ansible.log
connection_options:
ansible_connection: ssh
ansible_user: root
ansible_ssh_retries: 3
ansible_ssh_common_args: >-
-o StrictHostKeyChecking=no
-o "ProxyCommand=ssh -o StrictHostKeyChecking=no -o BatchMode=yes
-i ${MOLECULE_SSH_KEY:-~/.ssh/molecule_id_ed25519}
-W %h:%p root@${INCUS_HOST:-172.30.0.172}"
inventory:
group_vars:
all:
ansible_python_interpreter: /usr/bin/python3
scenario:
test_sequence:
- dependency
- cleanup
- destroy
- syntax
- create
- prepare
- converge
- verify
- cleanup
- destroy
verifier:
name: ansible
8 changes: 8 additions & 0 deletions molecule/elasticsearch_diagnostics/prepare.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
- name: Prepare
hosts: all
vars:
distro_cache_url: "{{ lookup('env', 'DISTRO_CACHE_URL') }}"
tasks:
- name: Common prepare tasks
ansible.builtin.include_tasks: ../shared/prepare_common.yml
3 changes: 3 additions & 0 deletions molecule/elasticsearch_diagnostics/requirements.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
collections:
- community.general
40 changes: 40 additions & 0 deletions molecule/elasticsearch_diagnostics/verify.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
---
- name: Verify Elasticsearch is healthy after diagnostics test
hosts: all
tasks:
- name: Check Elasticsearch service status
ansible.builtin.service_facts:

- name: Verify Elasticsearch is running
ansible.builtin.assert:
that:
- ansible_facts.services['elasticsearch.service'].state == 'running'
fail_msg: "Elasticsearch is not running after config restore"

- name: Fetch Elastic password
ansible.builtin.shell: |
set -o pipefail
grep "PASSWORD elastic " /usr/share/elasticsearch/initial_passwords |
awk {' print $4 '}
args:
executable: /bin/bash
register: elastic_pass
changed_when: false

- name: Verify Elasticsearch API is responsive
ansible.builtin.uri:
url: "https://localhost:9200/_cluster/health"
method: GET
validate_certs: false
force_basic_auth: true
user: elastic
password: "{{ elastic_pass.stdout }}"
status_code: 200
return_content: true
register: health

- name: Verify cluster health is green or yellow
ansible.builtin.assert:
that:
- health.json.status in ['green', 'yellow']
fail_msg: "Cluster health is {{ health.json.status }}"
18 changes: 9 additions & 9 deletions roles/beats/handlers/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,27 @@
# handlers file for beats
#
- name: Restart Filebeat
ansible.builtin.service:
name: filebeat
state: restarted
ansible.builtin.include_tasks: restart_and_verify_beat.yml
vars:
_beat_service_name: filebeat
when:
- not ansible_check_mode
- beats_filebeat | bool
- beats_filebeat_enable | bool

- name: Restart Auditbeat
ansible.builtin.service:
name: auditbeat
state: restarted
ansible.builtin.include_tasks: restart_and_verify_beat.yml
vars:
_beat_service_name: auditbeat
when:
- not ansible_check_mode
- beats_auditbeat | bool
- beats_auditbeat_enable | bool

- name: Restart Metricbeat
ansible.builtin.service:
name: metricbeat
state: restarted
ansible.builtin.include_tasks: restart_and_verify_beat.yml
vars:
_beat_service_name: metricbeat
when:
- not ansible_check_mode
- beats_metricbeat | bool
Expand Down
31 changes: 31 additions & 0 deletions roles/beats/tasks/restart_and_verify_beat.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---

- name: "Restart and verify beat — {{ _beat_service_name }}" # noqa: name[template]
block:
- name: "Restart beat service — {{ _beat_service_name }}" # noqa: name[template]
ansible.builtin.service:
name: "{{ _beat_service_name }}"
state: restarted

- name: "Verify beat service is running — {{ _beat_service_name }}" # noqa: name[template]

Check warning on line 10 in roles/beats/tasks/restart_and_verify_beat.yml

View workflow job for this annotation

GitHub Actions / lint

var-naming[no-role-prefix]

Variables names from within roles should use beats_ as a prefix. (register: _beat_service_state)

Check warning on line 10 in roles/beats/tasks/restart_and_verify_beat.yml

View workflow job for this annotation

GitHub Actions / lint_full / lint

var-naming[no-role-prefix]

Variables names from within roles should use beats_ as a prefix. (register: _beat_service_state)
ansible.builtin.systemd:
name: "{{ _beat_service_name }}"
register: _beat_service_state
until: _beat_service_state.status.ActiveState == 'active'
retries: 5
delay: 3

rescue:
- name: "Get recent journal output — {{ _beat_service_name }}" # noqa: name[template]

Check warning on line 19 in roles/beats/tasks/restart_and_verify_beat.yml

View workflow job for this annotation

GitHub Actions / lint

var-naming[no-role-prefix]

Variables names from within roles should use beats_ as a prefix. (register: _beat_journal)

Check warning on line 19 in roles/beats/tasks/restart_and_verify_beat.yml

View workflow job for this annotation

GitHub Actions / lint_full / lint

var-naming[no-role-prefix]

Variables names from within roles should use beats_ as a prefix. (register: _beat_journal)
ansible.builtin.command:
cmd: "journalctl -u {{ _beat_service_name }} --no-pager -n 50"
register: _beat_journal
changed_when: false

- name: "Fail with startup diagnostics — {{ _beat_service_name }}" # noqa: name[template]
ansible.builtin.fail:
msg: |
{{ _beat_service_name }} failed to start.

Recent log output:
{{ _beat_journal.stdout }}
5 changes: 1 addition & 4 deletions roles/elasticsearch/handlers/main.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
---
# handlers file for elasticsearch
- name: Restart Elasticsearch
ansible.builtin.service:
name: elasticsearch
state: restarted
daemon_reload: true
ansible.builtin.include_tasks: restart_and_verify_elasticsearch.yml
when:
- not ansible_check_mode
- elasticsearch_enable | bool
Expand Down
47 changes: 42 additions & 5 deletions roles/elasticsearch/handlers/restart_kibana.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,46 @@
delegate_to: "{{ item }}"
changed_when: false

- name: Restart Kibana
ansible.builtin.service:
name: kibana
state: restarted
delegate_to: "{{ item }}"
- name: Restart and wait for Kibana
when: "'kibana' in hostvars[item].ansible_facts.packages | default({})"
block:
- name: Restart Kibana service
ansible.builtin.service:
name: kibana
state: restarted
delegate_to: "{{ item }}"

- name: Wait for Kibana HTTP readiness after restart

Check warning on line 17 in roles/elasticsearch/handlers/restart_kibana.yml

View workflow job for this annotation

GitHub Actions / lint

var-naming[no-role-prefix]

Variables names from within roles should use elasticsearch_ as a prefix. (register: _kibana_handler_wait)

Check warning on line 17 in roles/elasticsearch/handlers/restart_kibana.yml

View workflow job for this annotation

GitHub Actions / lint_full / lint

var-naming[no-role-prefix]

Variables names from within roles should use elasticsearch_ as a prefix. (register: _kibana_handler_wait)
ansible.builtin.shell:
cmd: |
if ! systemctl is-active --quiet kibana; then
exit 2
fi
HTTP_CODE=$(curl -sk -o /dev/null -w '%{http_code}' http://localhost:5601/api/status 2>/dev/null) || true
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "401" ]; then
exit 0
fi
exit 1
delegate_to: "{{ item }}"
register: _kibana_handler_wait
until: _kibana_handler_wait.rc == 0
retries: 60
delay: 5
changed_when: false
failed_when: _kibana_handler_wait.rc == 2

rescue:
- name: Get recent Kibana journal output

Check warning on line 37 in roles/elasticsearch/handlers/restart_kibana.yml

View workflow job for this annotation

GitHub Actions / lint

var-naming[no-role-prefix]

Variables names from within roles should use elasticsearch_ as a prefix. (register: _kibana_handler_journal)

Check warning on line 37 in roles/elasticsearch/handlers/restart_kibana.yml

View workflow job for this annotation

GitHub Actions / lint_full / lint

var-naming[no-role-prefix]

Variables names from within roles should use elasticsearch_ as a prefix. (register: _kibana_handler_journal)
ansible.builtin.command:
cmd: journalctl -u kibana --no-pager -n 50
register: _kibana_handler_journal
delegate_to: "{{ item }}"
changed_when: false

- name: Fail with Kibana startup diagnostics
ansible.builtin.fail:
msg: |
Kibana failed to start after restart by elasticsearch handler.

Recent log output:
{{ _kibana_handler_journal.stdout }}
32 changes: 32 additions & 0 deletions roles/elasticsearch/tasks/restart_and_verify_elasticsearch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
---

- name: Restart and verify Elasticsearch
block:
- name: Restart Elasticsearch service
ansible.builtin.service:
name: elasticsearch
state: restarted
daemon_reload: true

- name: Verify Elasticsearch is running
ansible.builtin.systemd:
name: elasticsearch
register: _elasticsearch_service_state
until: _elasticsearch_service_state.status.ActiveState == 'active'
retries: 5
delay: 3

rescue:
- name: Get recent Elasticsearch journal output
ansible.builtin.command:
cmd: journalctl -u elasticsearch --no-pager -n 50
register: _elasticsearch_journal
changed_when: false

- name: Fail with Elasticsearch startup diagnostics
ansible.builtin.fail:
msg: |
Elasticsearch failed to start.

Recent log output:
{{ _elasticsearch_journal.stdout }}
Loading
Loading