Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,12 @@ __pycache__/
*.tar.gz
venv/
tests/integration/
!tests/integration/
tests/integration/*
!tests/integration/elasticsearch_cluster_settings_contract.yml
!tests/integration/elasticsearch_template_contract.yml
!tests/integration/elasticsearch_template_inventory.ini
!tests/integration/elasticsearch_upgrade_detection_contract.yml
!tests/integration/rolling_restart_contract.yml
!tests/integration/rolling_restart_inventory.ini
site/
79 changes: 79 additions & 0 deletions molecule/elasticsearch_default/side_effect.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
---
- name: Trigger config-change rolling restart
hosts: all
vars:
elasticstack_full_stack: false
elasticstack_release: "{{ lookup('env', 'ELASTIC_RELEASE') | default('9', true) | int }}"
elasticsearch_security: true
elasticsearch_http_protocol: https
elasticsearch_heap: "1"
elasticstack_no_log: false
elasticsearch_elastic_password: "TestPassword123!"
elasticsearch_jvm_custom_parameters:
- "-Des.config.restart.marker=true"
elasticsearch_config_restart_health_retries: 20
elasticsearch_config_restart_health_delay: 3
elasticsearch_cluster_settings:
action.destructive_requires_name: "true"
tasks:
- name: Reset shared role guard for config restart test
ansible.builtin.set_fact:
_elasticstack_role_imported: false

- name: Record ES PID before config change
ansible.builtin.command: pgrep -f 'org.elasticsearch.bootstrap.Elasticsearch'
register: _es_pid_before_config_restart
changed_when: false

- name: Include Elasticsearch with changed JVM config
ansible.builtin.include_role:
name: oddly.elasticstack.elasticsearch

- name: Record ES PID after config change
ansible.builtin.command: pgrep -f 'org.elasticsearch.bootstrap.Elasticsearch'
register: _es_pid_after_config_restart
changed_when: false

- name: Verify config change restarted Elasticsearch
ansible.builtin.assert:
that:
- _es_pid_before_config_restart.stdout != _es_pid_after_config_restart.stdout
fail_msg: >-
Elasticsearch was not restarted after config change on {{ inventory_hostname }}.
PID remained {{ _es_pid_after_config_restart.stdout }}.
success_msg: >-
Elasticsearch restarted after config change on {{ inventory_hostname }}.

- name: Build config restart rolling timeline # noqa: run-once[task]
ansible.builtin.set_fact:
_es_first_restart_host: "{{ groups['elasticsearch'][0] }}"
_es_second_restart_host: "{{ groups['elasticsearch'][1] }}"
_es_first_restart_started_usec: "{{ hostvars[groups['elasticsearch'][0]]._elasticsearch_config_restart_started_usec | int }}"
_es_first_restart_health_complete_usec: "{{ hostvars[groups['elasticsearch'][0]]._elasticsearch_config_restart_health_complete_usec | int }}"
_es_second_restart_started_usec: "{{ hostvars[groups['elasticsearch'][1]]._elasticsearch_config_restart_started_usec | int }}"
_es_second_restart_health_complete_usec: "{{ hostvars[groups['elasticsearch'][1]]._elasticsearch_config_restart_health_complete_usec | int }}"
run_once: true

- name: Verify config restart was rolling # noqa: run-once[task]
ansible.builtin.assert:
that:
- groups['elasticsearch'] | length == 2
- _es_first_restart_started_usec | int > 0
- _es_first_restart_health_complete_usec | int > 0
- _es_second_restart_started_usec | int > 0
- _es_second_restart_health_complete_usec | int > 0
- _es_first_restart_started_usec | int <= _es_first_restart_health_complete_usec | int
- _es_second_restart_started_usec | int <= _es_second_restart_health_complete_usec | int
- _es_first_restart_health_complete_usec | int <= _es_second_restart_started_usec | int
fail_msg: >-
Elasticsearch config restart did not wait for the first node's
post-restart health gate before starting the next node.
{{ _es_first_restart_host }} started={{ _es_first_restart_started_usec }}
health_complete={{ _es_first_restart_health_complete_usec }};
{{ _es_second_restart_host }} started={{ _es_second_restart_started_usec }}
health_complete={{ _es_second_restart_health_complete_usec }}.
success_msg: >-
Elasticsearch config restart was rolling and health-gated.
{{ _es_first_restart_host }} completed health before
{{ _es_second_restart_host }} started.
run_once: true
14 changes: 14 additions & 0 deletions roles/elasticsearch/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,20 @@ elasticsearch_conf_dir: "/etc/elasticsearch/"
elasticsearch_group: elasticsearch
# @var elasticsearch_api_host:description: Hostname or IP used for Elasticsearch API health checks
elasticsearch_api_host: localhost
# @var elasticsearch_config_restart_strategy:description: Strategy for config-triggered Elasticsearch restarts. Use rolling for multi-node clusters, direct for legacy all-host handler behavior
elasticsearch_config_restart_strategy: rolling
# @var elasticsearch_config_restart_flush:description: Flush indices before each node restart during config-triggered rolling restarts
elasticsearch_config_restart_flush: true
# @var elasticsearch_config_restart_wait_status:description: Minimum cluster health status before and after each config-triggered rolling restart. Use yellow or green
elasticsearch_config_restart_wait_status: green
# @var elasticsearch_config_restart_health_retries:description: Number of cluster health polling attempts during config-triggered rolling restarts
elasticsearch_config_restart_health_retries: 50
# @var elasticsearch_config_restart_health_delay:description: Delay in seconds between cluster health polling attempts during config-triggered rolling restarts
elasticsearch_config_restart_health_delay: 30
# @var elasticsearch_config_restart_node_retries:description: Number of attempts to confirm the restarted node has rejoined the cluster
elasticsearch_config_restart_node_retries: 200
# @var elasticsearch_config_restart_node_delay:description: Delay in seconds between restarted node join checks
elasticsearch_config_restart_node_delay: 3

# @var elasticsearch_jvm_custom_parameters:description: Additional JVM parameters appended to jvm.options.d. Use for GC tuning, debug flags, etc
# @var elasticsearch_jvm_custom_parameters:example: >
Expand Down
48 changes: 45 additions & 3 deletions roles/elasticsearch/handlers/main.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,54 @@
---
# handlers file for elasticsearch
- name: Restart Elasticsearch
- name: Mark Elasticsearch restart requested
ansible.builtin.set_fact:
_elasticsearch_restart_requested: true
listen: Restart Elasticsearch
when:
- not ansible_check_mode
- elasticsearch_enable | bool
- not (_elasticsearch_freshstart | default({'changed': false})).changed | bool
- not (_elasticsearch_freshstart_security | default({'changed': false})).changed | bool
- not _elasticsearch_rolling_upgrade_performed | default(false) | bool
Comment thread
coderabbitai[bot] marked this conversation as resolved.

- name: Restart Elasticsearch directly
ansible.builtin.include_tasks: restart_and_verify_elasticsearch.yml
listen: Restart Elasticsearch
when:
- not ansible_check_mode
- elasticsearch_enable | bool
- >
elasticsearch_config_restart_strategy == 'direct'
or groups[elasticstack_elasticsearch_group_name] | default([]) | length <= 1
Comment thread
coderabbitai[bot] marked this conversation as resolved.
- not (_elasticsearch_freshstart | default({'changed': false})).changed | bool
- not (_elasticsearch_freshstart_security | default({'changed': false})).changed | bool
- not _elasticsearch_rolling_upgrade_performed | default(false) | bool

- name: Clear direct Elasticsearch restart request
ansible.builtin.set_fact:
_elasticsearch_restart_requested: false
listen: Restart Elasticsearch
when:
- not ansible_check_mode
- elasticsearch_enable | bool
- >
elasticsearch_config_restart_strategy == 'direct'
or groups[elasticstack_elasticsearch_group_name] | default([]) | length <= 1
- not (_elasticsearch_freshstart | default({'changed': false})).changed | bool
- not (_elasticsearch_freshstart_security | default({'changed': false})).changed | bool
- not _elasticsearch_rolling_upgrade_performed | default(false) | bool

- name: Restart Elasticsearch rolling
ansible.builtin.include_tasks: restart_and_verify_elasticsearch_rolling.yml
listen: Restart Elasticsearch
run_once: true
when:
- not ansible_check_mode
- elasticsearch_enable | bool
- not _elasticsearch_freshstart.changed | bool
- not _elasticsearch_freshstart_security.changed | bool
- elasticsearch_config_restart_strategy == 'rolling'
- groups[elasticstack_elasticsearch_group_name] | default([]) | length > 1
- not (_elasticsearch_freshstart | default({'changed': false})).changed | bool
- not (_elasticsearch_freshstart_security | default({'changed': false})).changed | bool
- not _elasticsearch_rolling_upgrade_performed | default(false) | bool

- name: Restart kibana if available for elasticsearch certificates
Expand Down
58 changes: 58 additions & 0 deletions roles/elasticsearch/tasks/elasticsearch-cluster-settings.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
---

- name: elasticsearch-cluster-settings | Build effective cluster settings
ansible.builtin.set_fact:
_elasticsearch_effective_cluster_settings: >-
{{ (elasticsearch_logsdb | default(false) | bool)
| ternary({'cluster.logsdb.enabled': 'true'}, {})
| combine(elasticsearch_cluster_settings | default({})) }}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

- name: elasticsearch-cluster-settings | Apply persistent cluster settings # noqa: run-once[task]
when:
- _elasticsearch_effective_cluster_settings | length > 0
- elasticsearch_security | bool | ternary(elasticstack_password is defined and (elasticstack_password.stdout | default('') | length > 0), true)
- not ansible_check_mode
run_once: true
delegate_to: "{{ elasticstack_ca_host | default(inventory_hostname) }}"
block:
- name: elasticsearch-cluster-settings | Read current persistent cluster settings
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings?flat_settings=true"
method: GET
user: "{{ 'elastic' if elasticsearch_security | bool else omit }}"
password: "{{ elasticstack_password.stdout if elasticsearch_security | bool else omit }}"
force_basic_auth: "{{ elasticsearch_security | bool }}"
validate_certs: "{{ elasticsearch_validate_api_certs }}"
return_content: true
register: _elasticsearch_current_cluster_settings
no_log: "{{ elasticstack_no_log }}"

- name: elasticsearch-cluster-settings | Check if settings already match
ansible.builtin.set_fact:
_elasticsearch_cluster_settings_changed: "{{ _needs_update | trim }}"
vars:
_current: "{{ _elasticsearch_current_cluster_settings.json.persistent }}"
_needs_update: >-
{% set ns = namespace(changed=false) %}
{% for key, value in _elasticsearch_effective_cluster_settings.items() %}
{% if _current.get(key) is none or _current[key] | string != value | string %}
{% set ns.changed = true %}
Comment on lines +35 to +39
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Boolean setting values can break idempotent diff detection.

On Line 38, string comparison can mis-detect changes when desired values are YAML booleans (True/False) but Elasticsearch returns lowercase strings (true/false), causing repeated PUTs.

Proposed fix
-        _needs_update: >-
-          {% set ns = namespace(changed=false) %}
-          {% for key, value in _elasticsearch_effective_cluster_settings.items() %}
-          {%   if _current.get(key) is none or _current[key] | string != value | string %}
-          {%     set ns.changed = true %}
-          {%   endif %}
-          {% endfor %}
-          {{ ns.changed }}
+        _needs_update: >-
+          {% set ns = namespace(changed=false) %}
+          {% for key, value in _elasticsearch_effective_cluster_settings.items() %}
+          {%   if value is boolean %}
+          {%     set expected = 'true' if value else 'false' %}
+          {%   else %}
+          {%     set expected = value | string %}
+          {%   endif %}
+          {%   if _current.get(key) is none or (_current[key] | string) != expected %}
+          {%     set ns.changed = true %}
+          {%   endif %}
+          {% endfor %}
+          {{ ns.changed }}

As per coding guidelines, "Review for Ansible best practices: idempotency (command/shell needs creates/removes guards), correct handler notifications, proper use of become/become_user, and platform conditionals (Debian vs RHEL)."

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@roles/elasticsearch/tasks/elasticsearch-cluster-settings.yml` around lines 35
- 39, The idempotency check in the Jinja loop (_needs_update) can mis-detect
boolean changes because it compares value | string to _current[key] | string
while Elasticsearch may return lowercase "true"/"false"; update the comparison
in the loop that references _elasticsearch_effective_cluster_settings and
_current so both sides are normalized (e.g., apply |string|lower to
_current[key] and value, or coerce both to boolean using |bool) before comparing
and keep the existing None check for _current.get(key); modify the conditional
that sets ns.changed to use this normalized comparison so boolean settings no
longer trigger repeated PUTs.

{% endif %}
{% endfor %}
{{ ns.changed }}

- name: elasticsearch-cluster-settings | Apply cluster settings
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings"
method: PUT
body_format: json
body:
persistent: "{{ _elasticsearch_effective_cluster_settings }}"
user: "{{ 'elastic' if elasticsearch_security | bool else omit }}"
password: "{{ elasticstack_password.stdout if elasticsearch_security | bool else omit }}"
force_basic_auth: "{{ elasticsearch_security | bool }}"
validate_certs: "{{ elasticsearch_validate_api_certs }}"
status_code: 200
no_log: "{{ elasticstack_no_log }}"
when: _elasticsearch_cluster_settings_changed | bool
changed_when: true
36 changes: 36 additions & 0 deletions roles/elasticsearch/tasks/elasticsearch-upgrade-detection.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
---

# Detect whether a rolling upgrade is needed. Any version change — major,
# minor, or patch — should restart nodes one at a time with shard allocation
# management rather than restarting all nodes simultaneously.
#
# Pre-install detection covers cases where we know the target version:
# 1. Pinned version higher than installed (elasticstack_version: "9.2.0")
# 2. Major version change (elasticstack_release differs from installed)
#
# For "latest" mode, we can't know pre-install whether a newer version is
# available. The normal package tasks handle installation with state: latest,
# and if the package changed, a rolling restart is triggered post-install.
- name: elasticsearch-upgrade-detection | Detect if rolling upgrade is needed
ansible.builtin.set_fact:
_elasticsearch_needs_rolling_upgrade: >-
{{ ansible_facts.packages['elasticsearch'] is defined and
ansible_facts.packages['elasticsearch'][0].version is defined and
((elasticstack_version is defined and
elasticstack_version != 'latest' and
elasticstack_version is version(ansible_facts.packages['elasticsearch'][0].version, '>'))
or
(elasticstack_release | default(8) | int != (ansible_facts.packages['elasticsearch'][0].version.split('.')[0] | int))) }}

- name: elasticsearch-upgrade-detection | Check upgrade path requirement for ES 9.x
ansible.builtin.fail:
msg: |
UPGRADE PATH VIOLATION: Elasticsearch 9.x requires 8.19.x first.
Current version: {{ ansible_facts.packages['elasticsearch'][0].version }}
You must upgrade to 8.19.x before upgrading to 9.x.
See: https://www.elastic.co/docs/deploy-manage/upgrade/deployment-or-cluster
when:
- elasticstack_release | default(8) | int >= 9
- ansible_facts.packages['elasticsearch'] is defined
- ansible_facts.packages['elasticsearch'][0].version is version('8.0.0', '>=')
- ansible_facts.packages['elasticsearch'][0].version is version('8.19.0', '<')
92 changes: 3 additions & 89 deletions roles/elasticsearch/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,8 @@
ansible.builtin.package_facts:
manager: auto

# Detect whether a rolling upgrade is needed. Any version change — major,
# minor, or patch — should restart nodes one at a time with shard allocation
# management rather than restarting all nodes simultaneously.
#
# Pre-install detection covers cases where we know the target version:
# 1. Pinned version higher than installed (elasticstack_version: "9.2.0")
# 2. Major version change (elasticstack_release differs from installed)
#
# For "latest" mode, we can't know pre-install whether a newer version is
# available. The normal package tasks handle installation with state: latest,
# and if the package changed, a rolling restart is triggered post-install.
- name: Detect if rolling upgrade is needed (pre-install)
ansible.builtin.set_fact:
_elasticsearch_needs_rolling_upgrade: >-
{{ ansible_facts.packages['elasticsearch'] is defined and
ansible_facts.packages['elasticsearch'][0].version is defined and
((elasticstack_version is defined and
elasticstack_version != 'latest' and
elasticstack_version is version(ansible_facts.packages['elasticsearch'][0].version, '>'))
or
(elasticstack_release | int != (ansible_facts.packages['elasticsearch'][0].version.split('.')[0] | int))) }}

- name: Check upgrade path requirement for ES 9.x
ansible.builtin.fail:
msg: |
UPGRADE PATH VIOLATION: Elasticsearch 9.x requires 8.19.x first.
Current version: {{ ansible_facts.packages['elasticsearch'][0].version }}
You must upgrade to 8.19.x before upgrading to 9.x.
See: https://www.elastic.co/docs/deploy-manage/upgrade/deployment-or-cluster
when:
- elasticstack_release | int >= 9
- ansible_facts.packages['elasticsearch'] is defined
- ansible_facts.packages['elasticsearch'][0].version is version('8.0.0', '>=')
- ansible_facts.packages['elasticsearch'][0].version is version('8.19.0', '<')
ansible.builtin.include_tasks: elasticsearch-upgrade-detection.yml

- name: Include global role
ansible.builtin.import_role:
Expand Down Expand Up @@ -655,59 +623,5 @@

# -- Persistent cluster settings via _cluster/settings API --

- name: Build effective cluster settings
ansible.builtin.set_fact:
_elasticsearch_effective_cluster_settings: >-
{{ (elasticsearch_logsdb | bool)
| ternary({'cluster.logsdb.enabled': 'true'}, {})
| combine(elasticsearch_cluster_settings | default({})) }}

- name: Apply persistent cluster settings # noqa: run-once[task]
when:
- _elasticsearch_effective_cluster_settings | length > 0
- elasticsearch_security | bool | ternary(elasticstack_password is defined and (elasticstack_password.stdout | default('') | length > 0), true)
- not ansible_check_mode
run_once: true
delegate_to: "{{ elasticstack_ca_host | default(inventory_hostname) }}"
block:
- name: Read current persistent cluster settings
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings?flat_settings=true"
method: GET
user: "{{ 'elastic' if elasticsearch_security | bool else omit }}"
password: "{{ elasticstack_password.stdout if elasticsearch_security | bool else omit }}"
force_basic_auth: "{{ elasticsearch_security | bool }}"
validate_certs: "{{ elasticsearch_validate_api_certs }}"
return_content: true
register: _elasticsearch_current_cluster_settings
no_log: "{{ elasticstack_no_log }}"

- name: Check if settings already match
ansible.builtin.set_fact:
_elasticsearch_cluster_settings_changed: "{{ _needs_update | trim }}"
vars:
_current: "{{ _elasticsearch_current_cluster_settings.json.persistent }}"
_needs_update: >-
{% set ns = namespace(changed=false) %}
{% for key, value in _elasticsearch_effective_cluster_settings.items() %}
{% if _current.get(key) is none or _current[key] | string != value | string %}
{% set ns.changed = true %}
{% endif %}
{% endfor %}
{{ ns.changed }}

- name: Apply cluster settings
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings"
method: PUT
body_format: json
body:
persistent: "{{ _elasticsearch_effective_cluster_settings }}"
user: "{{ 'elastic' if elasticsearch_security | bool else omit }}"
password: "{{ elasticstack_password.stdout if elasticsearch_security | bool else omit }}"
force_basic_auth: "{{ elasticsearch_security | bool }}"
validate_certs: "{{ elasticsearch_validate_api_certs }}"
status_code: 200
no_log: "{{ elasticstack_no_log }}"
when: _elasticsearch_cluster_settings_changed | bool
changed_when: true
- name: Apply persistent cluster settings
ansible.builtin.include_tasks: elasticsearch-cluster-settings.yml
Loading
Loading