From 85d73cd9f5455ab0a83f41eae0dbf959c8708d57 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Fri, 17 Apr 2026 20:08:36 -0400
Subject: [PATCH 01/23] fix: prevent AttributeError race in autolock timer
 persistence

cancel() can null _end_time while _persist_to_store awaits async_load,
causing an AttributeError on isoformat(). Snapshot values into locals
before the first await so the persist uses captured state.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/helpers.py |  7 +++--
 tests/test_helpers.py                  | 40 ++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 1314118e..a4645c83 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -190,10 +190,13 @@ async def _persist_to_store(self) -> None:
         """Write current timer state to the store."""
         if not self._store or not self._timer_id or not self._end_time:
             return
+        # Capture values before any await; cancel() may clear them mid-persist.
+        end_time = self._end_time
+        duration = self._duration
         data = await self._store.async_load() or {}
         data[self._timer_id] = {
-            "end_time": self._end_time.isoformat(),
-            "duration": self._duration,
+            "end_time": end_time.isoformat(),
+            "duration": duration,
         }
         await self._store.async_save(data)
 
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index bd7cece3..dde9be9f 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -802,6 +802,46 @@ async def mock_action(*args):
     await timer._persist_to_store()
 
 
+async def test_keymaster_timer_persist_survives_concurrent_cancel(hass, mock_store):
+    """Test _persist_to_store doesn't crash if cancel() clears _end_time mid-persist."""
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    async def mock_action(*args):
+        pass
+
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+
+    # Make async_load simulate a yield that lets cancel() sneak in
+    original_end_time = dt_util.utcnow() + timedelta(minutes=5)
+
+    async def load_and_cancel():
+        """Simulate cancel() running during the await in _persist_to_store."""
+        timer._end_time = None
+        timer._duration = None
+        return {}
+
+    mock_store.async_load = AsyncMock(side_effect=load_and_cancel)
+
+    # Manually set timer state as start() would
+    timer._end_time = original_end_time
+    timer._duration = 300
+
+    # Should NOT raise AttributeError
+    await timer._persist_to_store()
+
+    # Store should have been saved with the snapshotted values
+    mock_store.async_save.assert_called()
+    saved_data = mock_store.async_save.call_args[0][0]
+    assert "test_timer" in saved_data
+    assert saved_data["test_timer"]["duration"] == 300
+
+
 async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store):
     """Test _remove_from_store is a no-op when timer_id is not in the store."""
     timer = KeymasterTimer()

From e2e42f44958bbe2d44b95d70f02947f7fa40d522 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Fri, 17 Apr 2026 20:44:13 -0400
Subject: [PATCH 02/23] fix: add re-check after await to prevent resurrecting
 canceled timers

After async_load yields, verify _end_time still matches the snapshot
before saving. If cancel() ran during the await, bail out instead of
re-adding the entry. Updates test to assert cancel wins the race.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/helpers.py |  3 +++
 tests/test_helpers.py                  | 11 ++++-------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index a4645c83..6ea21576 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -194,6 +194,9 @@ async def _persist_to_store(self) -> None:
         end_time = self._end_time
         duration = self._duration
         data = await self._store.async_load() or {}
+        # If cancel() ran during the await, don't resurrect a canceled entry.
+        if self._end_time is not end_time:
+            return
         data[self._timer_id] = {
             "end_time": end_time.isoformat(),
             "duration": duration,
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index dde9be9f..0f832abb 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -803,7 +803,7 @@ async def mock_action(*args):
 
 
 async def test_keymaster_timer_persist_survives_concurrent_cancel(hass, mock_store):
-    """Test _persist_to_store doesn't crash if cancel() clears _end_time mid-persist."""
+    """Test _persist_to_store bails out if cancel() clears _end_time mid-persist."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
         lock_name="test_lock",
@@ -832,14 +832,11 @@ async def load_and_cancel():
     timer._end_time = original_end_time
     timer._duration = 300
 
-    # Should NOT raise AttributeError
+    # Should NOT raise AttributeError, and should NOT resurrect the canceled entry
     await timer._persist_to_store()
 
-    # Store should have been saved with the snapshotted values
-    mock_store.async_save.assert_called()
-    saved_data = mock_store.async_save.call_args[0][0]
-    assert "test_timer" in saved_data
-    assert saved_data["test_timer"]["duration"] == 300
+    # cancel() won the race — persist should have bailed out without saving
+    mock_store.async_save.assert_not_called()
 
 
 async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store):

From 5beff2065735bcf732f7a38eb0eafceb1de0a317 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:02:19 -0400
Subject: [PATCH 03/23] fix: serialize store operations with asyncio.Lock

Replaces the snapshot+re-check approach with an asyncio.Lock shared
between _persist_to_store and _remove_from_store, fully preventing
interleaved store operations that could resurrect canceled timers.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/helpers.py | 35 ++++++++--------
 tests/test_helpers.py                  | 56 ++++++++++++++++++--------
 2 files changed, 59 insertions(+), 32 deletions(-)

diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 6ea21576..5bc652f4 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import asyncio
 from collections.abc import Callable, MutableMapping
 from datetime import datetime as dt, timedelta
 import logging
@@ -82,6 +83,7 @@ def __init__(self) -> None:
         self._duration: int | None = None
         self._timer_id: str | None = None
         self._store: Store[dict[str, TimerStoreEntry]] | None = None
+        self._store_lock = asyncio.Lock()
 
     async def setup(
         self,
@@ -190,27 +192,28 @@ async def _persist_to_store(self) -> None:
         """Write current timer state to the store."""
         if not self._store or not self._timer_id or not self._end_time:
             return
-        # Capture values before any await; cancel() may clear them mid-persist.
-        end_time = self._end_time
-        duration = self._duration
-        data = await self._store.async_load() or {}
-        # If cancel() ran during the await, don't resurrect a canceled entry.
-        if self._end_time is not end_time:
-            return
-        data[self._timer_id] = {
-            "end_time": end_time.isoformat(),
-            "duration": duration,
-        }
-        await self._store.async_save(data)
+        async with self._store_lock:
+            # Capture values so a concurrent cancel() can't null them mid-persist.
+            end_time = self._end_time
+            duration = self._duration
+            if not end_time:
+                return
+            data = await self._store.async_load() or {}
+            data[self._timer_id] = {
+                "end_time": end_time.isoformat(),
+                "duration": duration,
+            }
+            await self._store.async_save(data)
 
     async def _remove_from_store(self) -> None:
         """Remove this timer's entry from the store."""
         if not self._store or not self._timer_id:
             return
-        data = await self._store.async_load() or {}
-        if self._timer_id in data:
-            del data[self._timer_id]
-            await self._store.async_save(data)
+        async with self._store_lock:
+            data = await self._store.async_load() or {}
+            if self._timer_id in data:
+                del data[self._timer_id]
+                await self._store.async_save(data)
 
     @property
     def is_running(self) -> bool:
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 0f832abb..6d653e7c 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -802,8 +802,8 @@ async def mock_action(*args):
     await timer._persist_to_store()
 
 
-async def test_keymaster_timer_persist_survives_concurrent_cancel(hass, mock_store):
-    """Test _persist_to_store bails out if cancel() clears _end_time mid-persist."""
+async def test_keymaster_timer_persist_no_crash_when_end_time_cleared(hass, mock_store):
+    """Test _persist_to_store doesn't crash if _end_time is cleared before lock acquired."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
         lock_name="test_lock",
@@ -817,26 +817,50 @@ async def mock_action(*args):
 
     await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
 
-    # Make async_load simulate a yield that lets cancel() sneak in
-    original_end_time = dt_util.utcnow() + timedelta(minutes=5)
-
-    async def load_and_cancel():
-        """Simulate cancel() running during the await in _persist_to_store."""
+    # Set timer state then clear it (simulates cancel() winning the pre-guard check)
+    timer._end_time = dt_util.utcnow() + timedelta(minutes=5)
+    timer._duration = 300
+    # Acquire the lock first (simulates cancel holding it), then clear end_time
+    async with timer._store_lock:
         timer._end_time = None
         timer._duration = None
-        return {}
 
-    mock_store.async_load = AsyncMock(side_effect=load_and_cancel)
+    # Now persist runs — should bail out at the pre-guard, not crash
+    await timer._persist_to_store()
+    mock_store.async_save.assert_not_called()
 
-    # Manually set timer state as start() would
-    timer._end_time = original_end_time
-    timer._duration = 300
 
-    # Should NOT raise AttributeError, and should NOT resurrect the canceled entry
-    await timer._persist_to_store()
+async def test_keymaster_timer_store_lock_serializes_persist_and_remove(hass, mock_store):
+    """Test that _store_lock prevents persist and remove from interleaving."""
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
 
-    # cancel() won the race — persist should have bailed out without saving
-    mock_store.async_save.assert_not_called()
+    async def mock_action(*args):
+        pass
+
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+
+    with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+        await timer.start()
+
+    # Verify persist saved the timer
+    mock_store.async_save.assert_called()
+    saved_data = mock_store.async_save.call_args[0][0]
+    assert "test_timer" in saved_data
+
+    # Now cancel — remove should clean up the entry
+    mock_store.async_load = AsyncMock(return_value=dict(saved_data))
+    mock_store.async_save.reset_mock()
+    await timer.cancel()
+
+    mock_store.async_save.assert_called()
+    final_data = mock_store.async_save.call_args[0][0]
+    assert "test_timer" not in final_data
 
 
 async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store):

From 38cdbc0c3aca9ef6055d4a80b6598fcde1112985 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sat, 2 May 2026 18:02:43 -0400
Subject: [PATCH 04/23] test: replace weak race tests with true concurrent
 persist/cancel test

The previous two tests didn't actually exercise the race:
- one cleared _end_time before persist, exiting at the early guard
- the other ran start()/cancel() sequentially, so the lock was untested

Replaces both with a concurrent test using asyncio.Event to coordinate:
persist holds the lock while awaiting async_load, cancel is invoked
concurrently and must wait for the lock, then runs strictly after persist.
Verified to fail without the asyncio.Lock and pass with it.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test_helpers.py | 89 +++++++++++++++++++++++++------------------
 1 file changed, 52 insertions(+), 37 deletions(-)

diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 6d653e7c..35efe733 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1,5 +1,6 @@
 """Test keymaster helpers."""
 
+import asyncio
 from datetime import timedelta
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -802,8 +803,15 @@ async def mock_action(*args):
     await timer._persist_to_store()
 
 
-async def test_keymaster_timer_persist_no_crash_when_end_time_cleared(hass, mock_store):
-    """Test _persist_to_store doesn't crash if _end_time is cleared before lock acquired."""
+async def test_keymaster_timer_concurrent_persist_and_cancel(hass, mock_store):
+    """Test concurrent _persist_to_store and cancel() don't crash and cancel wins.
+
+    Reproduces the original race: persist passes its initial guard, awaits
+    async_load, and during that yield cancel() runs. With the asyncio.Lock,
+    cancel must wait for persist to release the lock — so persist completes
+    cleanly (no AttributeError) and cancel runs strictly after, leaving the
+    final store state with the entry removed.
+    """
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
         lock_name="test_lock",
@@ -817,50 +825,57 @@ async def mock_action(*args):
 
     await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
 
-    # Set timer state then clear it (simulates cancel() winning the pre-guard check)
-    timer._end_time = dt_util.utcnow() + timedelta(minutes=5)
-    timer._duration = 300
-    # Acquire the lock first (simulates cancel holding it), then clear end_time
-    async with timer._store_lock:
-        timer._end_time = None
-        timer._duration = None
+    # Simulate real store: load returns whatever was last saved
+    store_state: dict = {}
+    saved_states: list[dict] = []
 
-    # Now persist runs — should bail out at the pre-guard, not crash
-    await timer._persist_to_store()
-    mock_store.async_save.assert_not_called()
+    async def record_save(data):
+        nonlocal store_state
+        store_state = dict(data)
+        saved_states.append(dict(data))
 
+    mock_store.async_save = AsyncMock(side_effect=record_save)
 
-async def test_keymaster_timer_store_lock_serializes_persist_and_remove(hass, mock_store):
-    """Test that _store_lock prevents persist and remove from interleaving."""
-    timer = KeymasterTimer()
-    kmlock = KeymasterLock(
-        lock_name="test_lock",
-        lock_entity_id="lock.test_lock",
-        keymaster_config_entry_id="test_entry",
-    )
-    kmlock.autolock_min_day = 5
+    # Block async_load on an event so persist gets stuck after acquiring the lock.
+    # Once the event is set, persist completes; cancel (queued behind the lock)
+    # then runs and removes the entry.
+    load_release = asyncio.Event()
+    persist_load_started = asyncio.Event()
+    load_call_count = 0
 
-    async def mock_action(*args):
-        pass
+    async def blocking_load():
+        nonlocal load_call_count
+        load_call_count += 1
+        if load_call_count == 1:
+            # First call is from persist — block until cancel has been invoked
+            persist_load_started.set()
+            await load_release.wait()
+        return dict(store_state)
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    mock_store.async_load = AsyncMock(side_effect=blocking_load)
 
-    with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
-        await timer.start()
+    # Prime timer state as start() would
+    timer._end_time = dt_util.utcnow() + timedelta(minutes=5)
+    timer._duration = 300
 
-    # Verify persist saved the timer
-    mock_store.async_save.assert_called()
-    saved_data = mock_store.async_save.call_args[0][0]
-    assert "test_timer" in saved_data
+    # Kick off persist; it will acquire the lock and block on async_load
+    persist_task = asyncio.create_task(timer._persist_to_store())
+    await persist_load_started.wait()
 
-    # Now cancel — remove should clean up the entry
-    mock_store.async_load = AsyncMock(return_value=dict(saved_data))
-    mock_store.async_save.reset_mock()
-    await timer.cancel()
+    # Now invoke cancel concurrently — it must wait for the lock
+    cancel_task = asyncio.create_task(timer.cancel())
+    # Yield to let cancel attempt to acquire the lock and block
+    await asyncio.sleep(0)
+    assert not cancel_task.done(), "cancel should be blocked waiting for the store lock"
 
-    mock_store.async_save.assert_called()
-    final_data = mock_store.async_save.call_args[0][0]
-    assert "test_timer" not in final_data
+    # Release persist; it should save, then cancel acquires lock and removes
+    load_release.set()
+    await asyncio.gather(persist_task, cancel_task)
+
+    # No crash, persist saved the entry, cancel then removed it
+    assert len(saved_states) == 2
+    assert "test_timer" in saved_states[0], "persist should have saved the entry"
+    assert "test_timer" not in saved_states[1], "cancel should have removed the entry"
 
 
 async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store):

From d84b73ec8e9051daa0f0d3bdcc8458468b07ca84 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sat, 2 May 2026 18:12:14 -0400
Subject: [PATCH 05/23] fix: share timer store lock across all timers writing
 to same store
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The per-instance asyncio.Lock only protected one timer at a time. Since
all KeymasterTimer instances persist into the same _timer_store dict,
two timers could load the dict concurrently, mutate their own keys, and
the later async_save would clobber the earlier write — silently dropping
another timer's persisted entry.

Hoists the lock to the coordinator (one per _timer_store) and accepts
it via setup(store_lock=...). The per-instance default is kept as a
safe fallback for tests/standalone use.

Adds a regression test that fails without the shared lock and passes
with it.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/coordinator.py |  4 ++
 custom_components/keymaster/helpers.py     |  7 ++-
 tests/test_helpers.py                      | 63 ++++++++++++++++++++++
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/custom_components/keymaster/coordinator.py b/custom_components/keymaster/coordinator.py
index 58769fe7..1507af3a 100644
--- a/custom_components/keymaster/coordinator.py
+++ b/custom_components/keymaster/coordinator.py
@@ -122,6 +122,9 @@ def __init__(self, hass: HomeAssistant) -> None:
         self._timer_store: Store[dict[str, TimerStoreEntry]] = Store(
             hass, TIMER_STORAGE_VERSION, TIMER_STORAGE_KEY
         )
+        # Shared across all KeymasterTimer instances writing to _timer_store so
+        # concurrent persists/removes can't drop each other's entries.
+        self._timer_store_lock = asyncio.Lock()
 
     async def initial_setup(self) -> None:
         """Trigger the initial async_setup."""
@@ -928,6 +931,7 @@ async def _setup_timer(self, kmlock: KeymasterLock) -> None:
                 call_action=functools.partial(self._timer_triggered, kmlock),
                 timer_id=f"{kmlock.keymaster_config_entry_id}_autolock",
                 store=self._timer_store,
+                store_lock=self._timer_store_lock,
             )
             if kmlock.autolock_timer.is_running:
                 self.async_set_updated_data(dict(self.kmlocks))
diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 5bc652f4..fccdd487 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -83,7 +83,9 @@ def __init__(self) -> None:
         self._duration: int | None = None
         self._timer_id: str | None = None
         self._store: Store[dict[str, TimerStoreEntry]] | None = None
-        self._store_lock = asyncio.Lock()
+        # Replaced in setup() with a lock shared across all timers using the
+        # same store; the per-instance default is a safe fallback for tests.
+        self._store_lock: asyncio.Lock = asyncio.Lock()
 
     async def setup(
         self,
@@ -92,12 +94,15 @@ async def setup(
         call_action: Callable,
         timer_id: str,
         store: Store[dict[str, TimerStoreEntry]],
+        store_lock: asyncio.Lock | None = None,
     ) -> None:
         """Set up the timer and recover any persisted state."""
         self.hass = hass
         self._kmlock = kmlock
         self._call_action = call_action
         self._timer_id = timer_id
+        if store_lock is not None:
+            self._store_lock = store_lock
         self._store = store
 
         # Recover persisted timer
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 35efe733..7bf15403 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -878,6 +878,69 @@ async def blocking_load():
     assert "test_timer" not in saved_states[1], "cancel should have removed the entry"
 
 
+async def test_keymaster_timer_shared_lock_prevents_cross_timer_update_loss(hass, mock_store):
+    """Test two timers sharing a store lock don't drop each other's persisted entries.
+
+    Without a shared lock, timer A and timer B can both load the store dict
+    concurrently, each mutate their own key, and the later async_save() will
+    overwrite the earlier write. The shared lock forces serialization so
+    both entries land in the final saved state.
+    """
+    shared_lock = asyncio.Lock()
+
+    # Simulate the real Store: load returns whatever was last saved
+    store_state: dict = {}
+    saved_states: list[dict] = []
+
+    async def record_save(data):
+        nonlocal store_state
+        store_state = dict(data)
+        saved_states.append(dict(data))
+
+    # Snapshot store_state on entry, then yield. With the shared lock, only
+    # one persist enters at a time, so each snapshot reflects the previous
+    # save. Without the lock, both persists snapshot the empty pre-write
+    # state and the second save clobbers the first.
+    async def snapshotting_load():
+        snapshot = dict(store_state)
+        await asyncio.sleep(0)
+        return snapshot
+
+    async def mock_action(*args):
+        pass
+
+    timer_a = KeymasterTimer()
+    timer_b = KeymasterTimer()
+    kmlock_a = KeymasterLock(
+        lock_name="lock_a", lock_entity_id="lock.a", keymaster_config_entry_id="entry_a"
+    )
+    kmlock_b = KeymasterLock(
+        lock_name="lock_b", lock_entity_id="lock.b", keymaster_config_entry_id="entry_b"
+    )
+    await timer_a.setup(
+        hass, kmlock_a, mock_action, timer_id="timer_a", store=mock_store, store_lock=shared_lock
+    )
+    await timer_b.setup(
+        hass, kmlock_b, mock_action, timer_id="timer_b", store=mock_store, store_lock=shared_lock
+    )
+
+    # Swap the mocks AFTER setup so the barrier only counts persist's loads
+    mock_store.async_save = AsyncMock(side_effect=record_save)
+    mock_store.async_load = AsyncMock(side_effect=snapshotting_load)
+
+    timer_a._end_time = dt_util.utcnow() + timedelta(minutes=5)
+    timer_a._duration = 300
+    timer_b._end_time = dt_util.utcnow() + timedelta(minutes=10)
+    timer_b._duration = 600
+
+    # Persist both concurrently — without a shared lock, one would overwrite the other
+    await asyncio.gather(timer_a._persist_to_store(), timer_b._persist_to_store())
+
+    # Final saved state must contain BOTH entries
+    assert "timer_a" in store_state
+    assert "timer_b" in store_state
+
+
 async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store):
     """Test _remove_from_store is a no-op when timer_id is not in the store."""
     timer = KeymasterTimer()

From 4f2e50e4f9a83abf74d915b5ffe1aa639a695acf Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sat, 2 May 2026 18:20:41 -0400
Subject: [PATCH 06/23] refactor: make timer store_lock required, remove
 per-instance fallback

The per-instance asyncio.Lock default existed only to keep tests passing
without threading a lock through every fixture. In production the
coordinator always supplies the shared lock, and a per-instance lock is
the bug we just fixed (cross-timer overwrite race). Make store_lock a
required setup() parameter so the type system enforces correct wiring,
and update existing tests to pass the lock explicitly.

Adds an assertion in test_setup_timer_passes_timer_id_and_store that
the coordinator wires its shared _timer_store_lock to setup(), so a
future refactor can't silently drop it and reintroduce the race.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/helpers.py | 15 +++---
 tests/test_coordinator.py              |  6 ++-
 tests/test_helpers.py                  | 74 ++++++++++++++------------
 3 files changed, 52 insertions(+), 43 deletions(-)

diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index fccdd487..84828426 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -83,9 +83,9 @@ def __init__(self) -> None:
         self._duration: int | None = None
         self._timer_id: str | None = None
         self._store: Store[dict[str, TimerStoreEntry]] | None = None
-        # Replaced in setup() with a lock shared across all timers using the
-        # same store; the per-instance default is a safe fallback for tests.
-        self._store_lock: asyncio.Lock = asyncio.Lock()
+        # Set in setup(); shared across all timers writing to the same store
+        # so concurrent persists/removes can't drop each other's entries.
+        self._store_lock: asyncio.Lock | None = None
 
     async def setup(
         self,
@@ -94,15 +94,14 @@ async def setup(
         call_action: Callable,
         timer_id: str,
         store: Store[dict[str, TimerStoreEntry]],
-        store_lock: asyncio.Lock | None = None,
+        store_lock: asyncio.Lock,
     ) -> None:
         """Set up the timer and recover any persisted state."""
         self.hass = hass
         self._kmlock = kmlock
         self._call_action = call_action
         self._timer_id = timer_id
-        if store_lock is not None:
-            self._store_lock = store_lock
+        self._store_lock = store_lock
         self._store = store
 
         # Recover persisted timer
@@ -195,7 +194,7 @@ async def _resume(self, end_time: dt, duration: int) -> None:
 
     async def _persist_to_store(self) -> None:
         """Write current timer state to the store."""
-        if not self._store or not self._timer_id or not self._end_time:
+        if not self._store or not self._timer_id or not self._end_time or not self._store_lock:
             return
         async with self._store_lock:
             # Capture values so a concurrent cancel() can't null them mid-persist.
@@ -212,7 +211,7 @@ async def _persist_to_store(self) -> None:
 
     async def _remove_from_store(self) -> None:
         """Remove this timer's entry from the store."""
-        if not self._store or not self._timer_id:
+        if not self._store or not self._timer_id or not self._store_lock:
             return
         async with self._store_lock:
             data = await self._store.async_load() or {}
diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py
index 533f5a0b..2af981c7 100644
--- a/tests/test_coordinator.py
+++ b/tests/test_coordinator.py
@@ -1313,7 +1313,7 @@ def mock_coordinator(self, hass):
         return coordinator
 
     async def test_setup_timer_passes_timer_id_and_store(self, mock_coordinator):
-        """Test _setup_timer passes timer_id and store to timer.setup()."""
+        """Test _setup_timer passes timer_id, store, and shared store_lock to setup()."""
         kmlock = KeymasterLock(
             lock_name="test_lock",
             lock_entity_id="lock.test",
@@ -1334,6 +1334,10 @@ async def test_setup_timer_passes_timer_id_and_store(self, mock_coordinator):
         call_kwargs = mock_timer.setup.call_args.kwargs
         assert call_kwargs["timer_id"] == "test_entry_123_autolock"
         assert call_kwargs["store"] is mock_coordinator._timer_store
+        # The shared lock must be the coordinator's, not a per-timer one,
+        # otherwise concurrent persists from different timers can clobber
+        # each other's entries in the shared store dict.
+        assert call_kwargs["store_lock"] is mock_coordinator._timer_store_lock
 
     async def test_setup_timer_pushes_data_when_timer_resumed(self, mock_coordinator):
         """Test _setup_timer pushes data to entities when timer resumes from persistence."""
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 7bf15403..70cdbc6e 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -194,6 +194,12 @@ def mock_store():
     return store
 
 
+@pytest.fixture
+def store_lock():
+    """Provide a fresh asyncio.Lock for timer.setup()."""
+    return asyncio.Lock()
+
+
 # KeymasterTimer tests
 async def test_keymaster_timer_init():
     """Test KeymasterTimer initialization."""
@@ -228,7 +234,7 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store)
+    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
 
     assert timer.hass is hass
     assert timer._kmlock is kmlock
@@ -266,7 +272,7 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store)
+    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
 
     # Mock sun.is_up to return True (daytime)
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
@@ -300,7 +306,7 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store)
+    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
 
     # Mock sun.is_up to return False (nighttime)
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=False):
@@ -333,7 +339,7 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store)
+    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
 
     # Start timer first time
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
@@ -369,7 +375,7 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store)
+    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
 
     # Start timer
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
@@ -407,7 +413,7 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store)
+    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
 
     # Before starting
     assert timer.is_setup
@@ -495,7 +501,7 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store)
+    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
 
     # Start timer
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
@@ -524,7 +530,7 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store)
+    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
 
     unsub = MagicMock()
     timer._end_time = dt_util.utcnow() - timedelta(seconds=1)
@@ -545,7 +551,7 @@ async def mock_callback(*args):
     assert len(timer._unsub_events) == 1
 
 
-async def test_keymaster_timer_setup_recovers_expired_timer(hass, mock_store):
+async def test_keymaster_timer_setup_recovers_expired_timer(hass, mock_store, store_lock):
     """Test setup() fires action immediately when persisted timer has expired (issue #594)."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -565,7 +571,7 @@ async def mock_action(*args):
         nonlocal action_called
         action_called = True
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
     await hass.async_block_till_done()
 
     # Expired timer should have fired the action
@@ -580,7 +586,7 @@ async def mock_action(*args):
     assert not timer.is_running
 
 
-async def test_keymaster_timer_setup_resumes_active_timer(hass, mock_store):
+async def test_keymaster_timer_setup_resumes_active_timer(hass, mock_store, store_lock):
     """Test setup() resumes timer when persisted timer is still active."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -597,7 +603,7 @@ async def test_keymaster_timer_setup_resumes_active_timer(hass, mock_store):
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
 
     # Timer should be running with the persisted end_time
     assert timer.is_running
@@ -605,7 +611,7 @@ async def mock_action(*args):
     assert len(timer._unsub_events) == 1
 
 
-async def test_keymaster_timer_setup_no_persisted_timer(hass, mock_store):
+async def test_keymaster_timer_setup_no_persisted_timer(hass, mock_store, store_lock):
     """Test setup() does nothing when no persisted timer exists."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -617,14 +623,14 @@ async def test_keymaster_timer_setup_no_persisted_timer(hass, mock_store):
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
 
     # Timer should be set up but not running
     assert timer.is_setup
     assert not timer.is_running
 
 
-async def test_keymaster_timer_start_persists_to_store(hass, mock_store):
+async def test_keymaster_timer_start_persists_to_store(hass, mock_store, store_lock):
     """Test start() persists end_time to the store."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -637,7 +643,7 @@ async def test_keymaster_timer_start_persists_to_store(hass, mock_store):
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
 
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
         await timer.start()
@@ -650,7 +656,7 @@ async def mock_action(*args):
     assert saved_data["test_timer"]["duration"] == 300
 
 
-async def test_keymaster_timer_cancel_removes_from_store(hass, mock_store):
+async def test_keymaster_timer_cancel_removes_from_store(hass, mock_store, store_lock):
     """Test cancel() removes the timer from the store."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -663,7 +669,7 @@ async def test_keymaster_timer_cancel_removes_from_store(hass, mock_store):
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
 
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
         await timer.start()
@@ -700,7 +706,7 @@ def test_async_has_supported_provider_no_args(hass):
     assert result is False
 
 
-async def test_keymaster_timer_setup_invalid_end_time_format(hass, mock_store):
+async def test_keymaster_timer_setup_invalid_end_time_format(hass, mock_store, store_lock):
     """Test setup() handles corrupt end_time string in persisted data."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -719,7 +725,7 @@ async def mock_action(*args):
         nonlocal action_called
         action_called = True
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
 
     # Action should NOT have fired
     assert action_called is False
@@ -731,7 +737,7 @@ async def mock_action(*args):
     assert "test_timer" not in saved_data
 
 
-async def test_keymaster_timer_setup_missing_end_time_key(hass, mock_store):
+async def test_keymaster_timer_setup_missing_end_time_key(hass, mock_store, store_lock):
     """Test setup() handles persisted data with missing end_time key."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -746,7 +752,7 @@ async def test_keymaster_timer_setup_missing_end_time_key(hass, mock_store):
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
 
     assert not timer.is_running
     # Invalid entry should be removed from store
@@ -755,7 +761,7 @@ async def mock_action(*args):
     assert "test_timer" not in saved_data
 
 
-async def test_keymaster_timer_setup_null_end_time(hass, mock_store):
+async def test_keymaster_timer_setup_null_end_time(hass, mock_store, store_lock):
     """Test setup() handles persisted data with None end_time."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -771,7 +777,7 @@ async def test_keymaster_timer_setup_null_end_time(hass, mock_store):
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
 
     assert not timer.is_running
     mock_store.async_save.assert_called()
@@ -803,7 +809,7 @@ async def mock_action(*args):
     await timer._persist_to_store()
 
 
-async def test_keymaster_timer_concurrent_persist_and_cancel(hass, mock_store):
+async def test_keymaster_timer_concurrent_persist_and_cancel(hass, mock_store, store_lock):
     """Test concurrent _persist_to_store and cancel() don't crash and cancel wins.
 
     Reproduces the original race: persist passes its initial guard, awaits
@@ -823,7 +829,7 @@ async def test_keymaster_timer_concurrent_persist_and_cancel(hass, mock_store):
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
 
     # Simulate real store: load returns whatever was last saved
     store_state: dict = {}
@@ -878,7 +884,7 @@ async def blocking_load():
     assert "test_timer" not in saved_states[1], "cancel should have removed the entry"
 
 
-async def test_keymaster_timer_shared_lock_prevents_cross_timer_update_loss(hass, mock_store):
+async def test_keymaster_timer_shared_lock_prevents_cross_timer_update_loss(hass, mock_store, store_lock):
     """Test two timers sharing a store lock don't drop each other's persisted entries.
 
     Without a shared lock, timer A and timer B can both load the store dict
@@ -941,7 +947,7 @@ async def mock_action(*args):
     assert "timer_b" in store_state
 
 
-async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store):
+async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store, store_lock):
     """Test _remove_from_store is a no-op when timer_id is not in the store."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -958,7 +964,7 @@ async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store):
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
 
     mock_store.async_save.reset_mock()
 
@@ -969,7 +975,7 @@ async def mock_action(*args):
     mock_store.async_save.assert_not_called()
 
 
-async def test_keymaster_timer_setup_duration_missing_defaults_to_zero(hass, mock_store):
+async def test_keymaster_timer_setup_duration_missing_defaults_to_zero(hass, mock_store, store_lock):
     """Test setup() defaults duration to 0 when missing from persisted data."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -984,13 +990,13 @@ async def test_keymaster_timer_setup_duration_missing_defaults_to_zero(hass, moc
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
 
     assert timer.is_running
     assert timer._duration == 0
 
 
-async def test_keymaster_timer_on_expired_callback(hass, mock_store):
+async def test_keymaster_timer_on_expired_callback(hass, mock_store, store_lock):
     """Test the _on_expired callback fires the action then cancels the timer."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -1006,7 +1012,7 @@ async def mock_action(now):
         nonlocal action_called_with
         action_called_with = now
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store)
+    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
 
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
         await timer.start()
@@ -1046,7 +1052,7 @@ async def test_keymaster_timer_remove_from_store_no_store(hass):
     await timer._remove_from_store()
 
 
-async def test_keymaster_timer_remove_from_store_no_timer_id(hass, mock_store):
+async def test_keymaster_timer_remove_from_store_no_timer_id(hass, mock_store, store_lock):
     """Test _remove_from_store is a no-op when timer_id is None."""
     timer = KeymasterTimer()
     timer._store = mock_store

From 2b6f605d564c9e26e3dae99abf653bd534a4712c Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sat, 2 May 2026 18:32:17 -0400
Subject: [PATCH 07/23] style: ruff format

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test_helpers.py | 88 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 66 insertions(+), 22 deletions(-)

diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 70cdbc6e..14adad40 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -234,7 +234,9 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
+    await timer.setup(
+        hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock()
+    )
 
     assert timer.hass is hass
     assert timer._kmlock is kmlock
@@ -272,7 +274,9 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
+    await timer.setup(
+        hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock()
+    )
 
     # Mock sun.is_up to return True (daytime)
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
@@ -306,7 +310,9 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
+    await timer.setup(
+        hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock()
+    )
 
     # Mock sun.is_up to return False (nighttime)
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=False):
@@ -339,7 +345,9 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
+    await timer.setup(
+        hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock()
+    )
 
     # Start timer first time
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
@@ -375,7 +383,9 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
+    await timer.setup(
+        hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock()
+    )
 
     # Start timer
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
@@ -413,7 +423,9 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
+    await timer.setup(
+        hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock()
+    )
 
     # Before starting
     assert timer.is_setup
@@ -501,7 +513,9 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
+    await timer.setup(
+        hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock()
+    )
 
     # Start timer
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
@@ -530,7 +544,9 @@ async def mock_callback(*args):
     store = AsyncMock()
     store.async_load = AsyncMock(return_value={})
     store.async_save = AsyncMock()
-    await timer.setup(hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock())
+    await timer.setup(
+        hass, kmlock, mock_callback, timer_id="test_timer", store=store, store_lock=asyncio.Lock()
+    )
 
     unsub = MagicMock()
     timer._end_time = dt_util.utcnow() - timedelta(seconds=1)
@@ -571,7 +587,9 @@ async def mock_action(*args):
         nonlocal action_called
         action_called = True
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
     await hass.async_block_till_done()
 
     # Expired timer should have fired the action
@@ -603,7 +621,9 @@ async def test_keymaster_timer_setup_resumes_active_timer(hass, mock_store, stor
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
 
     # Timer should be running with the persisted end_time
     assert timer.is_running
@@ -623,7 +643,9 @@ async def test_keymaster_timer_setup_no_persisted_timer(hass, mock_store, store_
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
 
     # Timer should be set up but not running
     assert timer.is_setup
@@ -643,7 +665,9 @@ async def test_keymaster_timer_start_persists_to_store(hass, mock_store, store_l
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
 
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
         await timer.start()
@@ -669,7 +693,9 @@ async def test_keymaster_timer_cancel_removes_from_store(hass, mock_store, store
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
 
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
         await timer.start()
@@ -725,7 +751,9 @@ async def mock_action(*args):
         nonlocal action_called
         action_called = True
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
 
     # Action should NOT have fired
     assert action_called is False
@@ -752,7 +780,9 @@ async def test_keymaster_timer_setup_missing_end_time_key(hass, mock_store, stor
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
 
     assert not timer.is_running
     # Invalid entry should be removed from store
@@ -777,7 +807,9 @@ async def test_keymaster_timer_setup_null_end_time(hass, mock_store, store_lock)
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
 
     assert not timer.is_running
     mock_store.async_save.assert_called()
@@ -829,7 +861,9 @@ async def test_keymaster_timer_concurrent_persist_and_cancel(hass, mock_store, s
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
 
     # Simulate real store: load returns whatever was last saved
     store_state: dict = {}
@@ -884,7 +918,9 @@ async def blocking_load():
     assert "test_timer" not in saved_states[1], "cancel should have removed the entry"
 
 
-async def test_keymaster_timer_shared_lock_prevents_cross_timer_update_loss(hass, mock_store, store_lock):
+async def test_keymaster_timer_shared_lock_prevents_cross_timer_update_loss(
+    hass, mock_store, store_lock
+):
     """Test two timers sharing a store lock don't drop each other's persisted entries.
 
     Without a shared lock, timer A and timer B can both load the store dict
@@ -964,7 +1000,9 @@ async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store, s
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
 
     mock_store.async_save.reset_mock()
 
@@ -975,7 +1013,9 @@ async def mock_action(*args):
     mock_store.async_save.assert_not_called()
 
 
-async def test_keymaster_timer_setup_duration_missing_defaults_to_zero(hass, mock_store, store_lock):
+async def test_keymaster_timer_setup_duration_missing_defaults_to_zero(
+    hass, mock_store, store_lock
+):
     """Test setup() defaults duration to 0 when missing from persisted data."""
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -990,7 +1030,9 @@ async def test_keymaster_timer_setup_duration_missing_defaults_to_zero(hass, moc
     async def mock_action(*args):
         pass
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
 
     assert timer.is_running
     assert timer._duration == 0
@@ -1012,7 +1054,9 @@ async def mock_action(now):
         nonlocal action_called_with
         action_called_with = now
 
-    await timer.setup(hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
 
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
         await timer.start()

From 657d09bff1434e10498f3e5989d68dd5fac43a74 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sat, 2 May 2026 18:35:50 -0400
Subject: [PATCH 08/23] fix: detach old timer on lock reload to prevent
 orphaned callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a config entry was reloaded, _update_lock replaced the old
KeymasterLock with a new instance and called _setup_timer() to wire up
a fresh KeymasterTimer. The old timer was orphaned but its
async_call_later callback stayed scheduled, holding a stale kmlock
reference. On firing it would invoke _timer_triggered with the dead
kmlock, producing duplicate or out-of-date autolock actions.

Adds KeymasterTimer.detach() — like cancel(), but skips the store
removal so the replacement timer can resume from the persisted entry on
setup(). _update_lock now detaches the old timer before _setup_timer
runs.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/coordinator.py |  6 +++
 custom_components/keymaster/helpers.py     | 17 ++++++++
 tests/test_coordinator.py                  | 50 ++++++++++++++++++++++
 tests/test_helpers.py                      | 45 +++++++++++++++++++
 4 files changed, 118 insertions(+)

diff --git a/custom_components/keymaster/coordinator.py b/custom_components/keymaster/coordinator.py
index 1507af3a..60c88dd0 100644
--- a/custom_components/keymaster/coordinator.py
+++ b/custom_components/keymaster/coordinator.py
@@ -1135,6 +1135,12 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
         await self._rebuild_lock_relationships()
         await self._update_door_and_lock_state()
         await self._update_listeners(self.kmlocks[new.keymaster_config_entry_id])
+        # Drop the old kmlock's timer callbacks before wiring up the new one.
+        # Without this, the old timer's async_call_later stays scheduled with
+        # a stale kmlock reference and will fire alongside the new timer.
+        # Store entry is preserved so the new timer resumes from it.
+        if old.autolock_timer:
+            old.autolock_timer.detach()
         await self._setup_timer(self.kmlocks[new.keymaster_config_entry_id])
         await self.async_refresh()
         return True
diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 84828426..c2137007 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -168,6 +168,23 @@ async def cancel(self, timer_elapsed: dt | None = None) -> None:
         self._duration = None
         await self._remove_from_store()
 
+    def detach(self) -> None:
+        """Drop in-memory callbacks/state without touching the persisted store entry.
+
+        Used when the owning KeymasterLock is being replaced (e.g. config entry
+        reload). The replacement lock's timer will resume from the store entry
+        on setup(), so we must NOT remove it here. Without this, the orphaned
+        old timer keeps its async_call_later callback scheduled with a stale
+        kmlock reference, causing duplicate or out-of-date autolock actions.
+        """
+        _LOGGER.debug("[KeymasterTimer] Detaching timer (state preserved in store)")
+        self._cancel_callbacks()
+        self._end_time = None
+        self._duration = None
+        self.hass = None
+        self._kmlock = None
+        self._call_action = None
+
     def _schedule_callbacks(self, delay: float) -> None:
         """Schedule a single callback that fires the action then cleans up."""
 
diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py
index 2af981c7..c7757bb8 100644
--- a/tests/test_coordinator.py
+++ b/tests/test_coordinator.py
@@ -1373,6 +1373,56 @@ async def test_setup_timer_skips_if_already_setup(self, mock_coordinator):
 
         kmlock.autolock_timer.setup.assert_not_called()
 
+    async def test_update_lock_detaches_old_timer_before_setup(self, mock_coordinator):
+        """Test _update_lock detaches the old kmlock's timer before wiring up the new one.
+
+        Without detach(), the old timer's async_call_later callback stays
+        scheduled with a stale kmlock reference, firing alongside the new
+        timer and causing duplicate autolock actions on config entry reload.
+        """
+        entry_id = "test_entry_reload"
+
+        # Build a minimum-viable existing kmlock with a timer that looks active
+        old_lock = KeymasterLock(
+            lock_name="test_lock",
+            lock_entity_id="lock.test",
+            keymaster_config_entry_id=entry_id,
+        )
+        old_lock.starting_code_slot = 1
+        old_lock.number_of_code_slots = 1
+        old_lock.code_slots = {1: Mock(accesslimit_day_of_week=None)}
+        old_lock.autolock_timer = Mock()
+        old_lock.autolock_timer.detach = Mock()
+
+        # Build the replacement kmlock (config entry reload yields a new instance)
+        new_lock = KeymasterLock(
+            lock_name="test_lock",
+            lock_entity_id="lock.test",
+            keymaster_config_entry_id=entry_id,
+        )
+        new_lock.starting_code_slot = 1
+        new_lock.number_of_code_slots = 1
+        new_lock.code_slots = {1: Mock(accesslimit_day_of_week=None)}
+
+        mock_coordinator.kmlocks[entry_id] = old_lock
+        mock_coordinator._initial_setup_done_event.set()
+
+        with (
+            patch.object(KeymasterCoordinator, "_unsubscribe_listeners", new=AsyncMock()),
+            patch.object(mock_coordinator, "_rebuild_lock_relationships", new=AsyncMock()),
+            patch.object(mock_coordinator, "_update_door_and_lock_state", new=AsyncMock()),
+            patch.object(mock_coordinator, "_update_listeners", new=AsyncMock()),
+            patch.object(mock_coordinator, "_setup_timer", new=AsyncMock()) as mock_setup_timer,
+            patch.object(mock_coordinator, "async_refresh", new=AsyncMock()),
+        ):
+            await mock_coordinator._update_lock(new_lock)
+
+        # Old timer must be detached BEFORE the new timer is set up
+        old_lock.autolock_timer.detach.assert_called_once_with()
+        mock_setup_timer.assert_called_once()
+        # And the new kmlock (now in self.kmlocks) is what _setup_timer received
+        assert mock_setup_timer.call_args.args[0] is mock_coordinator.kmlocks[entry_id]
+
 
 # ============================================================================
 # State Synchronization Tests
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 14adad40..47a11f61 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -983,6 +983,51 @@ async def mock_action(*args):
     assert "timer_b" in store_state
 
 
+async def test_keymaster_timer_detach_preserves_store(hass, mock_store, store_lock):
+    """Test detach() cancels callbacks and clears refs but preserves the store entry.
+
+    detach() is used when a kmlock is being replaced (config entry reload). The
+    replacement's timer must be able to resume from the store, so detach must
+    NOT call _remove_from_store (unlike cancel()).
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    async def mock_action(*args):
+        pass
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+
+    with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+        await timer.start()
+
+    # Track save/load calls AFTER start() so we only see what detach does
+    mock_store.async_save.reset_mock()
+    mock_store.async_load.reset_mock()
+
+    # Sanity: timer has a scheduled callback before detach
+    assert len(timer._unsub_events) == 1
+
+    timer.detach()
+
+    # In-memory state cleared
+    assert timer._unsub_events == []
+    assert timer._end_time is None
+    assert timer._duration is None
+    assert timer.hass is None
+    assert timer._kmlock is None
+    assert timer._call_action is None
+    # Critical: store was NOT modified — replacement timer needs to resume from it
+    mock_store.async_save.assert_not_called()
+
+
 async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store, store_lock):
     """Test _remove_from_store is a no-op when timer_id is not in the store."""
     timer = KeymasterTimer()

From bdb4840eee50980fcaff7d84f6d89bc9f4aedbbf Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sat, 2 May 2026 18:41:24 -0400
Subject: [PATCH 09/23] review: address review feedback - log silent guards,
 fix comments, harden detach
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- _persist_to_store / _remove_from_store: log error instead of silently
  no-opping when _store_lock is missing. The old behavior would mask a
  bug where setup() was skipped, silently dropping autolock state across
  restarts.
- _persist_to_store: clarify the snapshot comment — it guards against
  detach() nulling _end_time while the persist was queued behind the
  lock, not against an unprotected race (the lock handles the latter).
- detach(): wrap _cancel_callbacks() in try/finally so an unsub raise
  (e.g. during HA shutdown) doesn't leave the orphan with stale refs.
- coordinator._update_lock: trim 4-line comment to 1, since detach()'s
  docstring covers the why.
- Drop the redundant _store_lock field comment.
- Add test for _persist_to_store after detach() being a no-op.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/coordinator.py |  5 +--
 custom_components/keymaster/helpers.py     | 51 +++++++++++++++-------
 tests/test_helpers.py                      | 31 +++++++++++++
 3 files changed, 67 insertions(+), 20 deletions(-)

diff --git a/custom_components/keymaster/coordinator.py b/custom_components/keymaster/coordinator.py
index 60c88dd0..ef0732e0 100644
--- a/custom_components/keymaster/coordinator.py
+++ b/custom_components/keymaster/coordinator.py
@@ -1135,10 +1135,7 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
         await self._rebuild_lock_relationships()
         await self._update_door_and_lock_state()
         await self._update_listeners(self.kmlocks[new.keymaster_config_entry_id])
-        # Drop the old kmlock's timer callbacks before wiring up the new one.
-        # Without this, the old timer's async_call_later stays scheduled with
-        # a stale kmlock reference and will fire alongside the new timer.
-        # Store entry is preserved so the new timer resumes from it.
+        # Cancel old timer's callbacks; new timer will resume from the preserved store entry.
         if old.autolock_timer:
             old.autolock_timer.detach()
         await self._setup_timer(self.kmlocks[new.keymaster_config_entry_id])
diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index c2137007..3d69c0f7 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -83,8 +83,6 @@ def __init__(self) -> None:
         self._duration: int | None = None
         self._timer_id: str | None = None
         self._store: Store[dict[str, TimerStoreEntry]] | None = None
-        # Set in setup(); shared across all timers writing to the same store
-        # so concurrent persists/removes can't drop each other's entries.
         self._store_lock: asyncio.Lock | None = None
 
     async def setup(
@@ -171,19 +169,25 @@ async def cancel(self, timer_elapsed: dt | None = None) -> None:
     def detach(self) -> None:
         """Drop in-memory callbacks/state without touching the persisted store entry.
 
-        Used when the owning KeymasterLock is being replaced (e.g. config entry
-        reload). The replacement lock's timer will resume from the store entry
-        on setup(), so we must NOT remove it here. Without this, the orphaned
-        old timer keeps its async_call_later callback scheduled with a stale
-        kmlock reference, causing duplicate or out-of-date autolock actions.
+        Unlike cancel(), which removes the store entry, detach() leaves it so a
+        replacement KeymasterTimer (created by _setup_timer on the new kmlock
+        after a config entry reload) can resume from it. Without this, the
+        orphaned old timer's async_call_later stays armed against a stale
+        kmlock reference and fires alongside the new timer.
         """
         _LOGGER.debug("[KeymasterTimer] Detaching timer (state preserved in store)")
-        self._cancel_callbacks()
-        self._end_time = None
-        self._duration = None
-        self.hass = None
-        self._kmlock = None
-        self._call_action = None
+        try:
+            self._cancel_callbacks()
+        except Exception:
+            # Best-effort: even if unsub raises (e.g. HA shutdown), we must
+            # finish nulling refs so the orphan can't operate on stale state.
+            _LOGGER.exception("[KeymasterTimer] Error cancelling callbacks during detach")
+        finally:
+            self._end_time = None
+            self._duration = None
+            self.hass = None
+            self._kmlock = None
+            self._call_action = None
 
     def _schedule_callbacks(self, delay: float) -> None:
         """Schedule a single callback that fires the action then cleans up."""
@@ -211,10 +215,18 @@ async def _resume(self, end_time: dt, duration: int) -> None:
 
     async def _persist_to_store(self) -> None:
         """Write current timer state to the store."""
-        if not self._store or not self._timer_id or not self._end_time or not self._store_lock:
+        if not self._store or not self._timer_id or not self._end_time:
+            return
+        if not self._store_lock:
+            _LOGGER.error(
+                "[KeymasterTimer] %s: store_lock missing; setup() was not called. "
+                "Persist skipped, autolock will not survive restart",
+                self._timer_id,
+            )
             return
         async with self._store_lock:
-            # Capture values so a concurrent cancel() can't null them mid-persist.
+            # Re-check after lock acquisition: detach() can null _end_time
+            # while we were queued, in which case there's nothing to persist.
             end_time = self._end_time
             duration = self._duration
             if not end_time:
@@ -228,7 +240,14 @@ async def _persist_to_store(self) -> None:
 
     async def _remove_from_store(self) -> None:
         """Remove this timer's entry from the store."""
-        if not self._store or not self._timer_id or not self._store_lock:
+        if not self._store or not self._timer_id:
+            return
+        if not self._store_lock:
+            _LOGGER.error(
+                "[KeymasterTimer] %s: store_lock missing; setup() was not called. "
+                "Remove skipped, stale entry will resume on restart",
+                self._timer_id,
+            )
             return
         async with self._store_lock:
             data = await self._store.async_load() or {}
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 47a11f61..9b0f9ef0 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1028,6 +1028,37 @@ async def mock_action(*args):
     mock_store.async_save.assert_not_called()
 
 
+async def test_keymaster_timer_persist_after_detach_is_noop(hass, mock_store, store_lock):
+    """Test _persist_to_store after detach() is a silent no-op (doesn't crash, no save).
+
+    detach() nulls _end_time, so the early guard returns. This protects against
+    a queued persist running after the timer was detached.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    async def mock_action(*args):
+        pass
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+        await timer.start()
+
+    timer.detach()
+    mock_store.async_save.reset_mock()
+
+    # Should not raise and should not save
+    await timer._persist_to_store()
+    mock_store.async_save.assert_not_called()
+
+
 async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store, store_lock):
     """Test _remove_from_store is a no-op when timer_id is not in the store."""
     timer = KeymasterTimer()

From 6809cd303496a69c24f88235f7e996bdd4fcea1b Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sat, 2 May 2026 18:45:21 -0400
Subject: [PATCH 10/23] fix: close two more reload races - setup load under
 lock, _detached flag

Copilot review found two remaining races:

1. setup() loaded the store outside the store lock. During config entry
   reload, the new timer could read the empty store while the outgoing
   timer's persist was still queued behind the lock, deciding idle and
   silently losing autolock state. Move the entire load+resume under
   the lock; inline _remove_from_store calls (asyncio.Lock isn't
   reentrant) for the cleanup paths.

2. detach() unsubscribes future callbacks but can't kill an _on_expired
   coroutine that already started. Its `await self.cancel()` would
   remove the store entry the replacement timer was about to resume
   from. Add a _detached flag set first thing in detach(); cancel()
   becomes a noop when detached so the orphan can't trash the store.

Both fixes have regression tests verified to fail without the change.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/helpers.py | 29 ++++++++--
 tests/test_helpers.py                  | 78 ++++++++++++++++++++++++++
 2 files changed, 101 insertions(+), 6 deletions(-)

diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 3d69c0f7..8ed6c782 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -84,6 +84,7 @@ def __init__(self) -> None:
         self._timer_id: str | None = None
         self._store: Store[dict[str, TimerStoreEntry]] | None = None
         self._store_lock: asyncio.Lock | None = None
+        self._detached = False
 
     async def setup(
         self,
@@ -102,10 +103,16 @@ async def setup(
         self._store_lock = store_lock
         self._store = store
 
-        # Recover persisted timer
-        data = await store.async_load() or {}
-        timer_data = data.get(timer_id)
-        if timer_data:
+        # Recover persisted timer under the lock so any in-flight persist/cancel
+        # from an outgoing timer (e.g. config entry reload) finishes first. This
+        # prevents reading a stale empty store while the old timer is mid-save.
+        # _resume is sync w.r.t. the store; _remove_from_store is inlined here
+        # because asyncio.Lock isn't reentrant.
+        async with self._store_lock:
+            data = await store.async_load() or {}
+            timer_data = data.get(timer_id)
+            if not timer_data:
+                return
             try:
                 end_time = dt.fromisoformat(timer_data["end_time"])
             except (KeyError, TypeError, ValueError):
@@ -113,7 +120,8 @@ async def setup(
                     "[KeymasterTimer] %s: Invalid persisted timer data, removing",
                     timer_id,
                 )
-                await self._remove_from_store()
+                del data[timer_id]
+                await store.async_save(data)
                 return
             duration = timer_data.get("duration", 0)
             if end_time <= dt_util.utcnow():
@@ -121,7 +129,8 @@ async def setup(
                     "[KeymasterTimer] %s: Persisted timer expired during downtime, firing",
                     timer_id,
                 )
-                await self._remove_from_store()
+                del data[timer_id]
+                await store.async_save(data)
                 hass.async_create_task(call_action(dt_util.utcnow()))
             else:
                 _LOGGER.debug(
@@ -157,6 +166,12 @@ async def start(self) -> bool:
 
     async def cancel(self, timer_elapsed: dt | None = None) -> None:
         """Cancel a timer."""
+        if self._detached:
+            # An _on_expired coroutine that was already running when detach()
+            # ran must NOT remove the store entry — the replacement timer owns
+            # it and will resume from there.
+            _LOGGER.debug("[KeymasterTimer] Cancel called on detached timer; ignoring")
+            return
         if timer_elapsed:
             _LOGGER.debug("[KeymasterTimer] Timer elapsed")
         else:
@@ -176,6 +191,8 @@ def detach(self) -> None:
         kmlock reference and fires alongside the new timer.
         """
         _LOGGER.debug("[KeymasterTimer] Detaching timer (state preserved in store)")
+        # Set first so any _on_expired already running short-circuits cancel().
+        self._detached = True
         try:
             self._cancel_callbacks()
         except Exception:
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 9b0f9ef0..fb191db8 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1028,6 +1028,84 @@ async def mock_action(*args):
     mock_store.async_save.assert_not_called()
 
 
+async def test_keymaster_timer_cancel_after_detach_is_noop(hass, mock_store, store_lock):
+    """Test cancel() on a detached timer doesn't touch the store.
+
+    Protects against an _on_expired coroutine that was already scheduled when
+    detach() ran: its `await self.cancel()` must not remove the store entry
+    that the replacement timer is about to resume from.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    async def mock_action(*args):
+        pass
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+        await timer.start()
+
+    # Make the store actually contain the persisted entry so cancel() would
+    # otherwise remove it
+    mock_store.async_load = AsyncMock(
+        return_value={"test_timer": {"end_time": "2099-01-01T00:00:00+00:00", "duration": 300}}
+    )
+    timer.detach()
+    mock_store.async_save.reset_mock()
+
+    # Simulating an in-flight _on_expired calling cancel after detach
+    await timer.cancel()
+    mock_store.async_save.assert_not_called()
+
+
+async def test_keymaster_timer_setup_load_under_lock(hass, mock_store, store_lock):
+    """Test setup()'s load+resume runs under the store lock.
+
+    If setup() loaded outside the lock, a config entry reload could let the
+    new timer read an empty store while the outgoing timer's persist was
+    still queued, silently losing autolock state.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    async def mock_action(*args):
+        pass
+
+    # Acquire the lock before setup; if setup loads under the lock it must wait
+    async with store_lock:
+        setup_task = asyncio.create_task(
+            timer.setup(
+                hass,
+                kmlock,
+                mock_action,
+                timer_id="test_timer",
+                store=mock_store,
+                store_lock=store_lock,
+            )
+        )
+        # Yield to let setup run as far as it can
+        await asyncio.sleep(0)
+        assert not setup_task.done(), "setup should be blocked on the store lock"
+        # async_load must NOT have been called yet — it's inside the lock
+        mock_store.async_load.assert_not_called()
+
+    # Lock released; setup should now finish
+    await setup_task
+    mock_store.async_load.assert_called()
+
+
 async def test_keymaster_timer_persist_after_detach_is_noop(hass, mock_store, store_lock):
     """Test _persist_to_store after detach() is a silent no-op (doesn't crash, no save).
 

From b6b87f3e19baee24383ff959225bbc0406a2cd40 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sat, 2 May 2026 18:59:34 -0400
Subject: [PATCH 11/23] fix: address three remaining reload races flagged by
 review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Copilot review found three more reload-related races. All real bugs.

1. detach()-then-cancel double-fire (helpers.py): if _on_expired had
   already passed `if self._call_action:` when detach ran, the action
   would fire and then `await self.cancel()` (now a noop on detached
   timers) left the entry in place. Replacement's setup() loaded the
   expired entry and replayed the action — double-fired the lock.
   Fix: rewrite _on_expired to remove the store entry FIRST, snapshot
   the action, then fire. Inline cleanup so we don't go through cancel.
   detach race during action call still leaves the entry removed and
   replacement idle.

2. start()/persist queued behind shared lock + detach (helpers.py):
   detach() nulled _end_time, so a _persist_to_store() queued behind
   another holder of the lock would re-check, see None, and return
   without saving. Replacement loaded empty store → autolock lost.
   Fix: detach() preserves _end_time/_duration. The queued persist
   completes its save; cancel() still nulls _end_time so its race
   continues to work via the existing snapshot+recheck.

3. _update_lock detach-too-late (coordinator.py): detach was called
   AFTER several await calls. If the old timer fired during the gap,
   _timer_triggered ran against the orphaned old kmlock (mutating
   state like pending_retry_lock that the new lock never sees).
   Fix: move detach to before any awaits, right after validation.

All three fixes have regression tests verified to fail without the
change and pass with it.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .gitignore                                 |   3 +
 custom_components/keymaster/coordinator.py |   8 +-
 custom_components/keymaster/helpers.py     |  50 ++++++---
 tests/test_coordinator.py                  |  39 +++++--
 tests/test_helpers.py                      | 119 +++++++++++++++++++--
 5 files changed, 179 insertions(+), 40 deletions(-)

diff --git a/.gitignore b/.gitignore
index a8ee76db..f01db9e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,6 @@ package-lock.json
 .cursor/
 .aider*
 .continue/
+
+# Serena MCP local cache
+.serena/
diff --git a/custom_components/keymaster/coordinator.py b/custom_components/keymaster/coordinator.py
index ef0732e0..2fbf50e5 100644
--- a/custom_components/keymaster/coordinator.py
+++ b/custom_components/keymaster/coordinator.py
@@ -1079,6 +1079,11 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
             or not old.code_slots
         ):
             return False
+        # Detach the old timer BEFORE any awaits below so its scheduled
+        # callback can't fire during the await chain (which would invoke
+        # _timer_triggered against the about-to-be-orphaned `old` kmlock).
+        if old.autolock_timer:
+            old.autolock_timer.detach()
         await KeymasterCoordinator._unsubscribe_listeners(old)
         # _LOGGER.debug("[update_lock] %s: old: %s", new.lock_name, old)
         del_code_slots: list[int] = [
@@ -1135,9 +1140,6 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
         await self._rebuild_lock_relationships()
         await self._update_door_and_lock_state()
         await self._update_listeners(self.kmlocks[new.keymaster_config_entry_id])
-        # Cancel old timer's callbacks; new timer will resume from the preserved store entry.
-        if old.autolock_timer:
-            old.autolock_timer.detach()
         await self._setup_timer(self.kmlocks[new.keymaster_config_entry_id])
         await self.async_refresh()
         return True
diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 8ed6c782..4d8615de 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -167,9 +167,8 @@ async def start(self) -> bool:
     async def cancel(self, timer_elapsed: dt | None = None) -> None:
         """Cancel a timer."""
         if self._detached:
-            # An _on_expired coroutine that was already running when detach()
-            # ran must NOT remove the store entry — the replacement timer owns
-            # it and will resume from there.
+            # The owning kmlock has been replaced; the replacement timer owns
+            # the store entry. Do nothing here so we can't trash its state.
             _LOGGER.debug("[KeymasterTimer] Cancel called on detached timer; ignoring")
             return
         if timer_elapsed:
@@ -182,16 +181,18 @@ async def cancel(self, timer_elapsed: dt | None = None) -> None:
         await self._remove_from_store()
 
     def detach(self) -> None:
-        """Drop in-memory callbacks/state without touching the persisted store entry.
-
-        Unlike cancel(), which removes the store entry, detach() leaves it so a
-        replacement KeymasterTimer (created by _setup_timer on the new kmlock
-        after a config entry reload) can resume from it. Without this, the
-        orphaned old timer's async_call_later stays armed against a stale
-        kmlock reference and fires alongside the new timer.
+        """Drop in-memory callbacks and clear the kmlock binding.
+
+        Unlike cancel(), detach() leaves the persisted store entry so a
+        replacement KeymasterTimer (created on the new kmlock after a config
+        entry reload) can resume from it. _end_time and _duration are also
+        preserved so an in-flight _persist_to_store() queued behind the store
+        lock will still write the entry — without this, a start() racing with
+        reload would silently drop the autolock.
         """
         _LOGGER.debug("[KeymasterTimer] Detaching timer (state preserved in store)")
-        # Set first so any _on_expired already running short-circuits cancel().
+        # Set first so any _on_expired already past its initial guard sees the
+        # detached state when it tries to call back into us.
         self._detached = True
         try:
             self._cancel_callbacks()
@@ -200,8 +201,6 @@ def detach(self) -> None:
             # finish nulling refs so the orphan can't operate on stale state.
             _LOGGER.exception("[KeymasterTimer] Error cancelling callbacks during detach")
         finally:
-            self._end_time = None
-            self._duration = None
             self.hass = None
             self._kmlock = None
             self._call_action = None
@@ -210,10 +209,27 @@ def _schedule_callbacks(self, delay: float) -> None:
         """Schedule a single callback that fires the action then cleans up."""
 
         async def _on_expired(now: dt) -> None:
-            """Fire the action and clean up timer state."""
-            if self._call_action:
-                await self._call_action(now)
-            await self.cancel(timer_elapsed=now)
+            """Fire the action and clean up timer state.
+
+            Removes the store entry BEFORE calling the action so that a
+            concurrent detach()/reload can't preserve a fired entry that
+            would replay on the replacement timer. The action snapshot is
+            taken before the store await so detach can't deny the firing
+            after we've committed to it.
+            """
+            if self._call_action is None:
+                # Detached before the callback could start; replacement timer
+                # owns the store entry and will fire it.
+                return
+            action = self._call_action
+            await self._remove_from_store()
+            try:
+                await action(now)
+            except Exception:
+                _LOGGER.exception("[KeymasterTimer] Action raised in _on_expired")
+            self._cancel_callbacks()
+            self._end_time = None
+            self._duration = None
 
         self._unsub_events.append(async_call_later(hass=self.hass, delay=delay, action=_on_expired))
 
diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py
index c7757bb8..f2ce8f9f 100644
--- a/tests/test_coordinator.py
+++ b/tests/test_coordinator.py
@@ -1373,16 +1373,17 @@ async def test_setup_timer_skips_if_already_setup(self, mock_coordinator):
 
         kmlock.autolock_timer.setup.assert_not_called()
 
-    async def test_update_lock_detaches_old_timer_before_setup(self, mock_coordinator):
-        """Test _update_lock detaches the old kmlock's timer before wiring up the new one.
+    async def test_update_lock_detaches_old_timer_before_any_await(self, mock_coordinator):
+        """Test _update_lock detaches the old timer BEFORE the await chain.
 
-        Without detach(), the old timer's async_call_later callback stays
-        scheduled with a stale kmlock reference, firing alongside the new
-        timer and causing duplicate autolock actions on config entry reload.
+        If detach happens after one of the awaits, the old timer can fire
+        in the gap and run _timer_triggered against the (about-to-be-)
+        orphaned kmlock — for example mutating old.pending_retry_lock,
+        which the replacement never sees.
         """
         entry_id = "test_entry_reload"
+        call_order: list[str] = []
 
-        # Build a minimum-viable existing kmlock with a timer that looks active
         old_lock = KeymasterLock(
             lock_name="test_lock",
             lock_entity_id="lock.test",
@@ -1392,9 +1393,8 @@ async def test_update_lock_detaches_old_timer_before_setup(self, mock_coordinato
         old_lock.number_of_code_slots = 1
         old_lock.code_slots = {1: Mock(accesslimit_day_of_week=None)}
         old_lock.autolock_timer = Mock()
-        old_lock.autolock_timer.detach = Mock()
+        old_lock.autolock_timer.detach = Mock(side_effect=lambda: call_order.append("detach"))
 
-        # Build the replacement kmlock (config entry reload yields a new instance)
         new_lock = KeymasterLock(
             lock_name="test_lock",
             lock_entity_id="lock.test",
@@ -1407,19 +1407,36 @@ async def test_update_lock_detaches_old_timer_before_setup(self, mock_coordinato
         mock_coordinator.kmlocks[entry_id] = old_lock
         mock_coordinator._initial_setup_done_event.set()
 
+        async def record_unsub(*args, **kwargs):
+            call_order.append("unsubscribe_listeners")
+
+        async def record_setup(*args, **kwargs):
+            call_order.append("setup_timer")
+
         with (
-            patch.object(KeymasterCoordinator, "_unsubscribe_listeners", new=AsyncMock()),
+            patch.object(
+                KeymasterCoordinator,
+                "_unsubscribe_listeners",
+                new=AsyncMock(side_effect=record_unsub),
+            ),
             patch.object(mock_coordinator, "_rebuild_lock_relationships", new=AsyncMock()),
             patch.object(mock_coordinator, "_update_door_and_lock_state", new=AsyncMock()),
             patch.object(mock_coordinator, "_update_listeners", new=AsyncMock()),
-            patch.object(mock_coordinator, "_setup_timer", new=AsyncMock()) as mock_setup_timer,
+            patch.object(
+                mock_coordinator, "_setup_timer", new=AsyncMock(side_effect=record_setup)
+            ) as mock_setup_timer,
             patch.object(mock_coordinator, "async_refresh", new=AsyncMock()),
         ):
             await mock_coordinator._update_lock(new_lock)
 
-        # Old timer must be detached BEFORE the new timer is set up
         old_lock.autolock_timer.detach.assert_called_once_with()
         mock_setup_timer.assert_called_once()
+        # Critical ordering: detach must run BEFORE any of the await calls
+        # in _update_lock, so the old timer can't fire during the gap.
+        assert call_order[0] == "detach", f"detach must be first, got {call_order}"
+        assert "unsubscribe_listeners" in call_order
+        assert call_order.index("detach") < call_order.index("unsubscribe_listeners")
+        assert call_order.index("detach") < call_order.index("setup_timer")
         # And the new kmlock (now in self.kmlocks) is what _setup_timer received
         assert mock_setup_timer.call_args.args[0] is mock_coordinator.kmlocks[entry_id]
 
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index fb191db8..76d73a1b 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1014,16 +1014,22 @@ async def mock_action(*args):
 
     # Sanity: timer has a scheduled callback before detach
     assert len(timer._unsub_events) == 1
+    saved_end_time = timer._end_time
+    saved_duration = timer._duration
 
     timer.detach()
 
-    # In-memory state cleared
+    # Callbacks unsubscribed and kmlock binding cleared
     assert timer._unsub_events == []
-    assert timer._end_time is None
-    assert timer._duration is None
     assert timer.hass is None
     assert timer._kmlock is None
     assert timer._call_action is None
+    assert timer._detached is True
+    # _end_time/_duration are PRESERVED so an in-flight _persist_to_store can
+    # still write the entry under the lock (otherwise a start() racing with
+    # reload would silently lose the autolock state).
+    assert timer._end_time == saved_end_time
+    assert timer._duration == saved_duration
     # Critical: store was NOT modified — replacement timer needs to resume from it
     mock_store.async_save.assert_not_called()
 
@@ -1106,11 +1112,104 @@ async def mock_action(*args):
     mock_store.async_load.assert_called()
 
 
-async def test_keymaster_timer_persist_after_detach_is_noop(hass, mock_store, store_lock):
-    """Test _persist_to_store after detach() is a silent no-op (doesn't crash, no save).
+async def test_keymaster_timer_on_expired_removes_store_before_action(hass, mock_store, store_lock):
+    """Test _on_expired removes store entry BEFORE calling the action.
 
-    detach() nulls _end_time, so the early guard returns. This protects against
-    a queued persist running after the timer was detached.
+    If detach() races during the action call, the store entry is already gone
+    so the replacement timer's setup() can't replay the same expired timer
+    and double-fire the lock action.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    call_order: list[str] = []
+
+    async def mock_action(*args):
+        call_order.append("action")
+
+    real_save = mock_store.async_save
+
+    async def tracking_save(data):
+        if "test_timer" not in data:
+            call_order.append("remove")
+        await real_save(data)
+
+    mock_store.async_save = AsyncMock(side_effect=tracking_save)
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+
+    # Capture the real _on_expired by patching async_call_later
+    with patch("custom_components.keymaster.helpers.async_call_later") as mock_call_later:
+        with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+            await timer.start()
+        callback_fn = mock_call_later.call_args[1]["action"]
+
+    # Make load return the entry so _remove_from_store actually saves
+    mock_store.async_load = AsyncMock(
+        return_value={"test_timer": {"end_time": timer._end_time.isoformat(), "duration": 300}}
+    )
+
+    await callback_fn(dt_util.utcnow())
+
+    # Remove must have happened BEFORE the action call
+    assert call_order == ["remove", "action"]
+
+
+async def test_keymaster_timer_on_expired_skipped_after_detach(hass, mock_store, store_lock):
+    """Test _on_expired is a no-op if detach() ran before it could fire.
+
+    When the timer was queued to fire (async_call_later put _on_expired in
+    the run queue) but detach ran first, _on_expired must NOT fire the
+    action — the replacement timer will resume from the preserved store
+    entry and fire it instead.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    action_called = False
+
+    async def mock_action(*args):
+        nonlocal action_called
+        action_called = True
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+
+    with patch("custom_components.keymaster.helpers.async_call_later") as mock_call_later:
+        with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+            await timer.start()
+        callback_fn = mock_call_later.call_args[1]["action"]
+
+    # Detach BEFORE the callback fires (simulating reload)
+    timer.detach()
+    mock_store.async_save.reset_mock()
+
+    await callback_fn(dt_util.utcnow())
+
+    assert not action_called, "action must not fire on a detached timer"
+    mock_store.async_save.assert_not_called()
+
+
+async def test_keymaster_timer_persist_after_detach_still_saves(hass, mock_store, store_lock):
+    """Test _persist_to_store after detach() still writes the entry.
+
+    detach() preserves _end_time/_duration so an in-flight persist queued
+    behind the store lock will still save when it eventually acquires it.
+    Without this, a start() racing with config entry reload would silently
+    lose the autolock — the replacement timer would load an empty store.
     """
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -1132,9 +1231,11 @@ async def mock_action(*args):
     timer.detach()
     mock_store.async_save.reset_mock()
 
-    # Should not raise and should not save
+    # Persist should still save the preserved end_time/duration
     await timer._persist_to_store()
-    mock_store.async_save.assert_not_called()
+    mock_store.async_save.assert_called_once()
+    saved_data = mock_store.async_save.call_args[0][0]
+    assert "test_timer" in saved_data
 
 
 async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store, store_lock):

From 1cd9201f7eea0d239117310d0074b4aaecb4a419 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sat, 2 May 2026 22:48:40 -0400
Subject: [PATCH 12/23] fix: post-remove recheck in _on_expired; raise on
 missing store_lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses three review findings:

1. _on_expired post-remove recheck (helpers.py): if detach() ran during
   the await self._remove_from_store() yield, the captured action would
   still fire against the now-orphaned kmlock — mutating state the
   replacement timer never sees. Add a _detached recheck after the
   remove await; hand off to the replacement timer if so.

2. _persist_to_store / _remove_from_store now raise RuntimeError when
   _store_lock is missing instead of log+return. The previous behavior
   silently lost autolock state across restarts (persist) or left a
   stale entry that fired a phantom autolock (remove). The invariant is
   "setup() was called", so failing loud is correct.

3. Refactor: extract _remove_from_store_locked() helper so setup()'s
   inlined cleanup paths stop duplicating the body. The public
   _remove_from_store() acquires the lock and delegates; setup() (which
   already holds the lock) calls _locked directly.

Comment fixes: clarify the setup-under-lock comment (previously
referenced nonexistent _resume name); rewrite _on_expired docstring
for clarity.

New regression tests:
- test_keymaster_timer_persist_recheck_aborts_after_cancel — covers the
  post-lock _end_time recheck that was previously untested
- test_keymaster_timer_on_expired_skips_action_if_detached_during_remove
  — covers the new post-remove recheck

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/helpers.py | 66 ++++++++++--------
 tests/test_helpers.py                  | 92 ++++++++++++++++++++++++++
 2 files changed, 129 insertions(+), 29 deletions(-)

diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 4d8615de..9fa12989 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -106,8 +106,7 @@ async def setup(
         # Recover persisted timer under the lock so any in-flight persist/cancel
         # from an outgoing timer (e.g. config entry reload) finishes first. This
         # prevents reading a stale empty store while the old timer is mid-save.
-        # _resume is sync w.r.t. the store; _remove_from_store is inlined here
-        # because asyncio.Lock isn't reentrant.
+        # We call the _locked helper directly since asyncio.Lock isn't reentrant.
         async with self._store_lock:
             data = await store.async_load() or {}
             timer_data = data.get(timer_id)
@@ -120,8 +119,7 @@ async def setup(
                     "[KeymasterTimer] %s: Invalid persisted timer data, removing",
                     timer_id,
                 )
-                del data[timer_id]
-                await store.async_save(data)
+                await self._remove_from_store_locked()
                 return
             duration = timer_data.get("duration", 0)
             if end_time <= dt_util.utcnow():
@@ -129,8 +127,7 @@ async def setup(
                     "[KeymasterTimer] %s: Persisted timer expired during downtime, firing",
                     timer_id,
                 )
-                del data[timer_id]
-                await store.async_save(data)
+                await self._remove_from_store_locked()
                 hass.async_create_task(call_action(dt_util.utcnow()))
             else:
                 _LOGGER.debug(
@@ -211,18 +208,24 @@ def _schedule_callbacks(self, delay: float) -> None:
         async def _on_expired(now: dt) -> None:
             """Fire the action and clean up timer state.
 
-            Removes the store entry BEFORE calling the action so that a
-            concurrent detach()/reload can't preserve a fired entry that
-            would replay on the replacement timer. The action snapshot is
-            taken before the store await so detach can't deny the firing
-            after we've committed to it.
+            Removes the store entry BEFORE calling the action so a concurrent
+            detach()/reload can't preserve a fired entry that would replay on
+            the replacement timer. After the remove await we re-check whether
+            detach() ran during the yield — if so, the action would target an
+            orphaned kmlock, so we hand off to the replacement timer instead.
             """
             if self._call_action is None:
-                # Detached before the callback could start; replacement timer
-                # owns the store entry and will fire it.
                 return
-            action = self._call_action
             await self._remove_from_store()
+            # Re-check after the await: if detach() ran during the remove, the
+            # captured kmlock is now orphaned. The replacement timer's setup()
+            # already loaded the (now-removed) entry under the same lock and
+            # will handle firing if it loaded before we removed.
+            if self._detached:
+                return
+            action = self._call_action
+            if action is None:
+                return
             try:
                 await action(now)
             except Exception:
@@ -251,14 +254,14 @@ async def _persist_to_store(self) -> None:
         if not self._store or not self._timer_id or not self._end_time:
             return
         if not self._store_lock:
-            _LOGGER.error(
-                "[KeymasterTimer] %s: store_lock missing; setup() was not called. "
-                "Persist skipped, autolock will not survive restart",
-                self._timer_id,
+            # store_lock is set in setup(); a missing lock here is a programming
+            # error. Silently no-opping would silently lose the autolock across
+            # the next restart.
+            raise RuntimeError(
+                f"[KeymasterTimer] {self._timer_id}: store_lock missing; setup() was not called"
             )
-            return
         async with self._store_lock:
-            # Re-check after lock acquisition: detach() can null _end_time
+            # Re-check after lock acquisition: cancel() can null _end_time
             # while we were queued, in which case there's nothing to persist.
             end_time = self._end_time
             duration = self._duration
@@ -276,17 +279,22 @@ async def _remove_from_store(self) -> None:
         if not self._store or not self._timer_id:
             return
         if not self._store_lock:
-            _LOGGER.error(
-                "[KeymasterTimer] %s: store_lock missing; setup() was not called. "
-                "Remove skipped, stale entry will resume on restart",
-                self._timer_id,
+            # store_lock is set in setup(); silently no-opping would leave a
+            # stale entry that fires a phantom autolock on the next restart.
+            raise RuntimeError(
+                f"[KeymasterTimer] {self._timer_id}: store_lock missing; setup() was not called"
             )
-            return
         async with self._store_lock:
-            data = await self._store.async_load() or {}
-            if self._timer_id in data:
-                del data[self._timer_id]
-                await self._store.async_save(data)
+            await self._remove_from_store_locked()
+
+    async def _remove_from_store_locked(self) -> None:
+        """Remove this timer's entry from the store. Caller MUST hold _store_lock."""
+        if not self._store or not self._timer_id:
+            return
+        data = await self._store.async_load() or {}
+        if self._timer_id in data:
+            del data[self._timer_id]
+            await self._store.async_save(data)
 
     @property
     def is_running(self) -> bool:
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 76d73a1b..750eb80e 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1238,6 +1238,98 @@ async def mock_action(*args):
     assert "test_timer" in saved_data
 
 
+async def test_keymaster_timer_on_expired_skips_action_if_detached_during_remove(
+    hass, mock_store, store_lock
+):
+    """Test _on_expired doesn't fire action against orphaned kmlock.
+
+    If detach() runs during the `await self._remove_from_store()` inside
+    _on_expired, the captured kmlock is now orphaned and firing the action
+    against it would mutate dead state. The post-remove _detached re-check
+    must catch this and skip the firing.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    action_called = False
+
+    async def mock_action(*args):
+        nonlocal action_called
+        action_called = True
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+
+    with patch("custom_components.keymaster.helpers.async_call_later") as mock_call_later:
+        with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+            await timer.start()
+        callback_fn = mock_call_later.call_args[1]["action"]
+
+    # Make async_load yield once so we can detach during the remove
+    async def yielding_load():
+        await asyncio.sleep(0)
+        return {"test_timer": {"end_time": timer._end_time.isoformat(), "duration": 300}}
+
+    mock_store.async_load = AsyncMock(side_effect=yielding_load)
+
+    # Kick off _on_expired; it'll yield inside _remove_from_store
+    expire_task = asyncio.create_task(callback_fn(dt_util.utcnow()))
+    await asyncio.sleep(0)
+    # Detach during the remove-await
+    timer.detach()
+    await expire_task
+
+    # Action must NOT have fired against the orphaned kmlock
+    assert not action_called
+
+
+async def test_keymaster_timer_persist_recheck_aborts_after_cancel(hass, mock_store, store_lock):
+    """Test _persist_to_store's post-lock recheck aborts when cancel nulled _end_time.
+
+    Scenario: persist is queued behind another lock holder. While queued,
+    cancel() nulls _end_time. When persist acquires the lock, the recheck
+    must catch the now-None _end_time and bail out without saving.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    async def mock_action(*args):
+        pass
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+
+    timer._end_time = dt_util.utcnow() + timedelta(minutes=5)
+    timer._duration = 300
+    mock_store.async_save.reset_mock()
+
+    # Hold the lock externally so persist queues behind us
+    async with store_lock:
+        persist_task = asyncio.create_task(timer._persist_to_store())
+        # Yield to let persist enter, hit the pre-guard (passes), and block on lock
+        await asyncio.sleep(0)
+        assert not persist_task.done(), "persist should be blocked on the lock"
+        # Simulate cancel() nulling _end_time while persist waits
+        timer._end_time = None
+        timer._duration = None
+
+    # Lock released; persist resumes, recheck sees None, returns without saving
+    await persist_task
+    mock_store.async_save.assert_not_called()
+
+
 async def test_keymaster_timer_remove_from_store_missing_key(hass, mock_store, store_lock):
     """Test _remove_from_store is a no-op when timer_id is not in the store."""
     timer = KeymasterTimer()

From d6f83d07cad2fcf86332bb7071e25902b9d5cab9 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sat, 2 May 2026 22:53:58 -0400
Subject: [PATCH 13/23] fix: async detach() awaits in-flight callback; copy
 pending_retry_lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the two remaining narrow reload races flagged by review:

1. _on_expired action firing on orphaned kmlock (helpers.py): even with
   the post-remove _detached recheck, the action snapshot was committed
   before the await, so detach during the action call still ran the
   action against the orphaned old kmlock. Mutations like
   pending_retry_lock = True were lost when the replacement kmlock
   replaced it in self.kmlocks.

   Fix: make detach() async, track _on_expired as a Task, and have
   detach() await the in-flight task before nulling refs. The action
   completes against the still-current old kmlock, then _update_lock's
   state-copy reads the result.

2. pending_retry_lock not preserved across reload (coordinator.py):
   _update_lock copies many state fields but not pending_retry_lock,
   so even without the race above, a reload during a door-open retry
   would silently forget the pending state. Add the copy.

Together these guarantee that an autolock action firing during a config
entry reload is correctly observed by the replacement.

New regression tests:
- test_keymaster_timer_detach_awaits_in_flight_callback — verifies
  detach blocks until the in-flight _on_expired completes
- test_update_lock_copies_pending_retry_lock — verifies the new copy

Updates all timer.detach() callsites in tests to await the now-async
method, and the coordinator regression test to use AsyncMock for detach.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/coordinator.py | 12 ++--
 custom_components/keymaster/helpers.py     | 63 ++++++++++++--------
 tests/test_coordinator.py                  | 52 ++++++++++++++++-
 tests/test_helpers.py                      | 68 ++++++++++++++++++++--
 4 files changed, 162 insertions(+), 33 deletions(-)

diff --git a/custom_components/keymaster/coordinator.py b/custom_components/keymaster/coordinator.py
index 2fbf50e5..effffeb4 100644
--- a/custom_components/keymaster/coordinator.py
+++ b/custom_components/keymaster/coordinator.py
@@ -1079,11 +1079,12 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
             or not old.code_slots
         ):
             return False
-        # Detach the old timer BEFORE any awaits below so its scheduled
-        # callback can't fire during the await chain (which would invoke
-        # _timer_triggered against the about-to-be-orphaned `old` kmlock).
+        # Detach the old timer BEFORE any other awaits so its scheduled
+        # callback can't fire during the await chain. detach() awaits any
+        # in-flight _on_expired so any mutations it makes to old (e.g.
+        # pending_retry_lock) are visible when we copy state below.
         if old.autolock_timer:
-            old.autolock_timer.detach()
+            await old.autolock_timer.detach()
         await KeymasterCoordinator._unsubscribe_listeners(old)
         # _LOGGER.debug("[update_lock] %s: old: %s", new.lock_name, old)
         del_code_slots: list[int] = [
@@ -1102,6 +1103,9 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
         new.autolock_min_day = old.autolock_min_day
         new.autolock_min_night = old.autolock_min_night
         new.retry_lock = old.retry_lock
+        # Preserve transient retry state; the door-open retry path on the
+        # replacement won't fire its lock action otherwise.
+        new.pending_retry_lock = old.pending_retry_lock
         for code_slot_num, new_kmslot in new.code_slots.items():
             if code_slot_num not in old.code_slots:
                 continue
diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 9fa12989..c1553303 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -85,6 +85,7 @@ def __init__(self) -> None:
         self._store: Store[dict[str, TimerStoreEntry]] | None = None
         self._store_lock: asyncio.Lock | None = None
         self._detached = False
+        self._on_expired_task: asyncio.Task[None] | None = None
 
     async def setup(
         self,
@@ -177,7 +178,7 @@ async def cancel(self, timer_elapsed: dt | None = None) -> None:
         self._duration = None
         await self._remove_from_store()
 
-    def detach(self) -> None:
+    async def detach(self) -> None:
         """Drop in-memory callbacks and clear the kmlock binding.
 
         Unlike cancel(), detach() leaves the persisted store entry so a
@@ -186,6 +187,10 @@ def detach(self) -> None:
         preserved so an in-flight _persist_to_store() queued behind the store
         lock will still write the entry — without this, a start() racing with
         reload would silently drop the autolock.
+
+        Awaits any in-flight _on_expired callback before clearing the kmlock
+        binding so callers (e.g. _update_lock copying state from old to new)
+        observe whatever mutations the action made on the old kmlock.
         """
         _LOGGER.debug("[KeymasterTimer] Detaching timer (state preserved in store)")
         # Set first so any _on_expired already past its initial guard sees the
@@ -197,10 +202,18 @@ def detach(self) -> None:
             # Best-effort: even if unsub raises (e.g. HA shutdown), we must
             # finish nulling refs so the orphan can't operate on stale state.
             _LOGGER.exception("[KeymasterTimer] Error cancelling callbacks during detach")
-        finally:
-            self.hass = None
-            self._kmlock = None
-            self._call_action = None
+        # Wait for any in-flight _on_expired to finish so callers see its
+        # mutations (e.g. pending_retry_lock) on the old kmlock before they
+        # copy state to the replacement.
+        in_flight = self._on_expired_task
+        if in_flight is not None and not in_flight.done():
+            try:
+                await in_flight
+            except Exception:
+                _LOGGER.exception("[KeymasterTimer] In-flight callback raised during detach")
+        self.hass = None
+        self._kmlock = None
+        self._call_action = None
 
     def _schedule_callbacks(self, delay: float) -> None:
         """Schedule a single callback that fires the action then cleans up."""
@@ -213,26 +226,30 @@ async def _on_expired(now: dt) -> None:
             the replacement timer. After the remove await we re-check whether
             detach() ran during the yield — if so, the action would target an
             orphaned kmlock, so we hand off to the replacement timer instead.
+
+            Registers itself as self._on_expired_task so that detach() can
+            await its completion (any mutations the action made to the old
+            kmlock are then visible to _update_lock's state-copy).
             """
-            if self._call_action is None:
-                return
-            await self._remove_from_store()
-            # Re-check after the await: if detach() ran during the remove, the
-            # captured kmlock is now orphaned. The replacement timer's setup()
-            # already loaded the (now-removed) entry under the same lock and
-            # will handle firing if it loaded before we removed.
-            if self._detached:
-                return
-            action = self._call_action
-            if action is None:
-                return
+            self._on_expired_task = asyncio.current_task()
             try:
-                await action(now)
-            except Exception:
-                _LOGGER.exception("[KeymasterTimer] Action raised in _on_expired")
-            self._cancel_callbacks()
-            self._end_time = None
-            self._duration = None
+                if self._call_action is None:
+                    return
+                await self._remove_from_store()
+                if self._detached:
+                    return
+                action = self._call_action
+                if action is None:
+                    return
+                try:
+                    await action(now)
+                except Exception:
+                    _LOGGER.exception("[KeymasterTimer] Action raised in _on_expired")
+                self._cancel_callbacks()
+                self._end_time = None
+                self._duration = None
+            finally:
+                self._on_expired_task = None
 
         self._unsub_events.append(async_call_later(hass=self.hass, delay=delay, action=_on_expired))
 
diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py
index f2ce8f9f..5ffaa579 100644
--- a/tests/test_coordinator.py
+++ b/tests/test_coordinator.py
@@ -1393,7 +1393,11 @@ async def test_update_lock_detaches_old_timer_before_any_await(self, mock_coordi
         old_lock.number_of_code_slots = 1
         old_lock.code_slots = {1: Mock(accesslimit_day_of_week=None)}
         old_lock.autolock_timer = Mock()
-        old_lock.autolock_timer.detach = Mock(side_effect=lambda: call_order.append("detach"))
+
+        async def record_detach():
+            call_order.append("detach")
+
+        old_lock.autolock_timer.detach = AsyncMock(side_effect=record_detach)
 
         new_lock = KeymasterLock(
             lock_name="test_lock",
@@ -1440,6 +1444,52 @@ async def record_setup(*args, **kwargs):
         # And the new kmlock (now in self.kmlocks) is what _setup_timer received
         assert mock_setup_timer.call_args.args[0] is mock_coordinator.kmlocks[entry_id]
 
+    async def test_update_lock_copies_pending_retry_lock(self, mock_coordinator):
+        """Test _update_lock copies pending_retry_lock from old to new kmlock.
+
+        If autolock was waiting for the door to close (pending_retry_lock=True)
+        when the user reloaded, the replacement kmlock must inherit that flag
+        or it will never re-attempt the lock.
+        """
+        entry_id = "test_entry_pending_retry"
+
+        old_lock = KeymasterLock(
+            lock_name="test_lock",
+            lock_entity_id="lock.test",
+            keymaster_config_entry_id=entry_id,
+        )
+        old_lock.starting_code_slot = 1
+        old_lock.number_of_code_slots = 1
+        old_lock.code_slots = {1: Mock(accesslimit_day_of_week=None)}
+        old_lock.autolock_timer = Mock()
+        old_lock.autolock_timer.detach = AsyncMock()
+        old_lock.pending_retry_lock = True  # autolock was waiting
+
+        new_lock = KeymasterLock(
+            lock_name="test_lock",
+            lock_entity_id="lock.test",
+            keymaster_config_entry_id=entry_id,
+        )
+        new_lock.starting_code_slot = 1
+        new_lock.number_of_code_slots = 1
+        new_lock.code_slots = {1: Mock(accesslimit_day_of_week=None)}
+        # Default is False; the copy in _update_lock should flip it to True
+
+        mock_coordinator.kmlocks[entry_id] = old_lock
+        mock_coordinator._initial_setup_done_event.set()
+
+        with (
+            patch.object(KeymasterCoordinator, "_unsubscribe_listeners", new=AsyncMock()),
+            patch.object(mock_coordinator, "_rebuild_lock_relationships", new=AsyncMock()),
+            patch.object(mock_coordinator, "_update_door_and_lock_state", new=AsyncMock()),
+            patch.object(mock_coordinator, "_update_listeners", new=AsyncMock()),
+            patch.object(mock_coordinator, "_setup_timer", new=AsyncMock()),
+            patch.object(mock_coordinator, "async_refresh", new=AsyncMock()),
+        ):
+            await mock_coordinator._update_lock(new_lock)
+
+        assert mock_coordinator.kmlocks[entry_id].pending_retry_lock is True
+
 
 # ============================================================================
 # State Synchronization Tests
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 750eb80e..83ace7e1 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1017,7 +1017,7 @@ async def mock_action(*args):
     saved_end_time = timer._end_time
     saved_duration = timer._duration
 
-    timer.detach()
+    await timer.detach()
 
     # Callbacks unsubscribed and kmlock binding cleared
     assert timer._unsub_events == []
@@ -1063,7 +1063,7 @@ async def mock_action(*args):
     mock_store.async_load = AsyncMock(
         return_value={"test_timer": {"end_time": "2099-01-01T00:00:00+00:00", "duration": 300}}
     )
-    timer.detach()
+    await timer.detach()
     mock_store.async_save.reset_mock()
 
     # Simulating an in-flight _on_expired calling cancel after detach
@@ -1194,7 +1194,7 @@ async def mock_action(*args):
         callback_fn = mock_call_later.call_args[1]["action"]
 
     # Detach BEFORE the callback fires (simulating reload)
-    timer.detach()
+    await timer.detach()
     mock_store.async_save.reset_mock()
 
     await callback_fn(dt_util.utcnow())
@@ -1228,7 +1228,7 @@ async def mock_action(*args):
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
         await timer.start()
 
-    timer.detach()
+    await timer.detach()
     mock_store.async_save.reset_mock()
 
     # Persist should still save the preserved end_time/duration
@@ -1238,6 +1238,64 @@ async def mock_action(*args):
     assert "test_timer" in saved_data
 
 
+async def test_keymaster_timer_detach_awaits_in_flight_callback(hass, mock_store, store_lock):
+    """Test detach() awaits any in-flight _on_expired before returning.
+
+    This ensures _update_lock observes whatever mutations the action made
+    on the old kmlock (e.g. pending_retry_lock) before it copies state to
+    the replacement. Without the await, detach would return while the
+    action was still running and the state copy would miss the mutation.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    action_started = asyncio.Event()
+    action_release = asyncio.Event()
+    action_completed = False
+
+    async def slow_action(*args):
+        nonlocal action_completed
+        action_started.set()
+        await action_release.wait()
+        action_completed = True
+
+    await timer.setup(
+        hass, kmlock, slow_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+
+    with patch("custom_components.keymaster.helpers.async_call_later") as mock_call_later:
+        with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+            await timer.start()
+        callback_fn = mock_call_later.call_args[1]["action"]
+
+    # Make load return the entry so remove will save (and reach the action)
+    mock_store.async_load = AsyncMock(
+        return_value={"test_timer": {"end_time": timer._end_time.isoformat(), "duration": 300}}
+    )
+
+    # Kick off the callback; it'll block inside slow_action
+    expire_task = asyncio.create_task(callback_fn(dt_util.utcnow()))
+    await action_started.wait()
+    assert not action_completed
+
+    # Now start detach; it must wait for the in-flight callback
+    detach_task = asyncio.create_task(timer.detach())
+    await asyncio.sleep(0)
+    assert not detach_task.done(), "detach must wait for in-flight callback"
+
+    # Release the action, both should complete
+    action_release.set()
+    await asyncio.gather(expire_task, detach_task)
+
+    assert action_completed
+    assert timer.hass is None  # detach finished its cleanup
+
+
 async def test_keymaster_timer_on_expired_skips_action_if_detached_during_remove(
     hass, mock_store, store_lock
 ):
@@ -1282,7 +1340,7 @@ async def yielding_load():
     expire_task = asyncio.create_task(callback_fn(dt_util.utcnow()))
     await asyncio.sleep(0)
     # Detach during the remove-await
-    timer.detach()
+    await timer.detach()
     await expire_task
 
     # Action must NOT have fired against the orphaned kmlock

From 9858c2b5571fc2bf768dcf57eb8d68930abfc75b Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sun, 3 May 2026 00:04:28 -0400
Subject: [PATCH 14/23] fix: revert _on_expired to action-first ordering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous remove-first ordering was Copilot-flagged as having two
problems:

1. Storage failure during _remove_from_store would prevent the action
   from firing (the safety-critical lock operation depends on cleanup
   succeeding).

2. detach() landing between remove completion and action call would
   skip the action and silently lose the autolock — replacement loads
   empty store.

Switch back to action-first ordering. The original concern (replay on
reload) is now handled by detach() awaiting the in-flight callback
(introduced last commit): the action fires, the cleanup runs, the
entry is removed, then detach unblocks. Replacement loads empty and
correctly stays idle.

Use _remove_from_store directly (not self.cancel()) since cancel() is
a noop on detached timers — the in-flight callback must always remove
the entry it owns.

New regression tests:
- test_keymaster_timer_on_expired_action_before_remove — verifies
  ordering
- test_keymaster_timer_on_expired_action_fires_even_if_remove_raises
  — verifies storage failure doesn't block firing

Replaces obsolete test_keymaster_timer_on_expired_skips_action_if_
detached_during_remove with one that verifies the new contract: the
in-flight callback removes the entry even when detach races.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/helpers.py |  25 +++---
 tests/test_helpers.py                  | 118 ++++++++++++++++++-------
 2 files changed, 96 insertions(+), 47 deletions(-)

diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index c1553303..a9bc6a19 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -219,32 +219,31 @@ def _schedule_callbacks(self, delay: float) -> None:
         """Schedule a single callback that fires the action then cleans up."""
 
         async def _on_expired(now: dt) -> None:
-            """Fire the action and clean up timer state.
+            """Fire the action then clean up timer state.
 
-            Removes the store entry BEFORE calling the action so a concurrent
-            detach()/reload can't preserve a fired entry that would replay on
-            the replacement timer. After the remove await we re-check whether
-            detach() ran during the yield — if so, the action would target an
-            orphaned kmlock, so we hand off to the replacement timer instead.
+            Action runs FIRST so storage failures during cleanup can't
+            prevent the safety-critical lock from firing. We use
+            _remove_from_store directly (not cancel()) because cancel() is
+            a noop on detached timers; an in-flight callback racing with
+            detach() must still remove the entry or the replacement timer's
+            setup() will replay the action.
 
             Registers itself as self._on_expired_task so that detach() can
-            await its completion (any mutations the action made to the old
-            kmlock are then visible to _update_lock's state-copy).
+            await completion — any mutations the action made on the old
+            kmlock are then visible to _update_lock's state copy.
             """
             self._on_expired_task = asyncio.current_task()
             try:
                 if self._call_action is None:
                     return
-                await self._remove_from_store()
-                if self._detached:
-                    return
                 action = self._call_action
-                if action is None:
-                    return
                 try:
                     await action(now)
                 except Exception:
                     _LOGGER.exception("[KeymasterTimer] Action raised in _on_expired")
+                # Cleanup happens regardless of whether detach() ran during
+                # the action; we own the store entry until we finish here.
+                await self._remove_from_store()
                 self._cancel_callbacks()
                 self._end_time = None
                 self._duration = None
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 83ace7e1..274f357f 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1112,12 +1112,14 @@ async def mock_action(*args):
     mock_store.async_load.assert_called()
 
 
-async def test_keymaster_timer_on_expired_removes_store_before_action(hass, mock_store, store_lock):
-    """Test _on_expired removes store entry BEFORE calling the action.
-
-    If detach() races during the action call, the store entry is already gone
-    so the replacement timer's setup() can't replay the same expired timer
-    and double-fire the lock action.
+async def test_keymaster_timer_on_expired_action_before_remove(hass, mock_store, store_lock):
+    """Test _on_expired fires the action BEFORE store cleanup.
+
+    Action is the safety-critical operation; if storage cleanup raises
+    (e.g. transient async_load/async_save failure), the lock must still
+    have been locked. detach() awaits this task so the action can complete
+    even if reload races, and the in-flight callback removes the entry
+    before it returns so the replacement won't replay.
     """
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -1145,21 +1147,61 @@ async def tracking_save(data):
         hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
     )
 
-    # Capture the real _on_expired by patching async_call_later
     with patch("custom_components.keymaster.helpers.async_call_later") as mock_call_later:
         with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
             await timer.start()
         callback_fn = mock_call_later.call_args[1]["action"]
 
-    # Make load return the entry so _remove_from_store actually saves
     mock_store.async_load = AsyncMock(
         return_value={"test_timer": {"end_time": timer._end_time.isoformat(), "duration": 300}}
     )
 
     await callback_fn(dt_util.utcnow())
 
-    # Remove must have happened BEFORE the action call
-    assert call_order == ["remove", "action"]
+    # Action must fire before remove so storage failures can't block the lock
+    assert call_order == ["action", "remove"]
+
+
+async def test_keymaster_timer_on_expired_action_fires_even_if_remove_raises(
+    hass, mock_store, store_lock
+):
+    """Test the action fires even if store cleanup raises.
+
+    Action is safety-critical (locks the door). A transient storage error
+    must not prevent it. The next restart will replay the timer (entry
+    still in store), which is acceptable.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    action_called = False
+
+    async def mock_action(*args):
+        nonlocal action_called
+        action_called = True
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+
+    with patch("custom_components.keymaster.helpers.async_call_later") as mock_call_later:
+        with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+            await timer.start()
+        callback_fn = mock_call_later.call_args[1]["action"]
+
+    # Make remove fail by having async_load raise
+    mock_store.async_load = AsyncMock(side_effect=OSError("transient disk error"))
+
+    # Callback should propagate the error but action should have fired first
+    with pytest.raises(OSError):
+        await callback_fn(dt_util.utcnow())
+
+    assert action_called, "action must fire even if storage cleanup fails"
 
 
 async def test_keymaster_timer_on_expired_skipped_after_detach(hass, mock_store, store_lock):
@@ -1296,15 +1338,14 @@ async def slow_action(*args):
     assert timer.hass is None  # detach finished its cleanup
 
 
-async def test_keymaster_timer_on_expired_skips_action_if_detached_during_remove(
+async def test_keymaster_timer_on_expired_remove_runs_when_detach_races(
     hass, mock_store, store_lock
 ):
-    """Test _on_expired doesn't fire action against orphaned kmlock.
+    """Test _on_expired removes the store entry even if detach races.
 
-    If detach() runs during the `await self._remove_from_store()` inside
-    _on_expired, the captured kmlock is now orphaned and firing the action
-    against it would mutate dead state. The post-remove _detached re-check
-    must catch this and skip the firing.
+    The in-flight callback owns the store entry and must remove it before
+    completing — otherwise the replacement timer's setup() would replay
+    the action. detach() awaits the callback so this remove always runs.
     """
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -1314,14 +1355,24 @@ async def test_keymaster_timer_on_expired_skips_action_if_detached_during_remove
     )
     kmlock.autolock_min_day = 5
 
-    action_called = False
+    action_started = asyncio.Event()
+    action_release = asyncio.Event()
+    save_calls: list[dict] = []
 
-    async def mock_action(*args):
-        nonlocal action_called
-        action_called = True
+    async def slow_action(*args):
+        action_started.set()
+        await action_release.wait()
+
+    async def record_save(data):
+        save_calls.append(dict(data))
+
+    mock_store.async_save = AsyncMock(side_effect=record_save)
+    mock_store.async_load = AsyncMock(
+        return_value={"test_timer": {"end_time": "2099-01-01T00:00:00+00:00", "duration": 300}}
+    )
 
     await timer.setup(
-        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+        hass, kmlock, slow_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
     )
 
     with patch("custom_components.keymaster.helpers.async_call_later") as mock_call_later:
@@ -1329,22 +1380,21 @@ async def mock_action(*args):
             await timer.start()
         callback_fn = mock_call_later.call_args[1]["action"]
 
-    # Make async_load yield once so we can detach during the remove
-    async def yielding_load():
-        await asyncio.sleep(0)
-        return {"test_timer": {"end_time": timer._end_time.isoformat(), "duration": 300}}
-
-    mock_store.async_load = AsyncMock(side_effect=yielding_load)
-
-    # Kick off _on_expired; it'll yield inside _remove_from_store
+    save_calls.clear()  # ignore start()'s persist
     expire_task = asyncio.create_task(callback_fn(dt_util.utcnow()))
+    await action_started.wait()
+
+    # Reload arrives mid-action; detach must wait for the callback to finish
+    detach_task = asyncio.create_task(timer.detach())
     await asyncio.sleep(0)
-    # Detach during the remove-await
-    await timer.detach()
-    await expire_task
+    assert not detach_task.done()
+
+    action_release.set()
+    await asyncio.gather(expire_task, detach_task)
 
-    # Action must NOT have fired against the orphaned kmlock
-    assert not action_called
+    # Final save must be the cleanup with entry removed
+    assert save_calls, "remove must have called async_save"
+    assert "test_timer" not in save_calls[-1], "store entry must be removed"
 
 
 async def test_keymaster_timer_persist_recheck_aborts_after_cancel(hass, mock_store, store_lock):

From 86b0f995b8db5367703f311a1fbada3bc1432770 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sun, 3 May 2026 00:15:28 -0400
Subject: [PATCH 15/23] fix: preserve store entry on action failure; rollback
 on _update_lock failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two more Copilot-flagged real bugs:

1. _on_expired's try/except around action(now) caught the exception and
   STILL ran the cleanup, removing the persisted entry. If the lock
   service genuinely failed, the door stays unlocked AND the autolock
   state is silently lost (no replay on restart).
   Fix: skip cleanup when action raises so the entry stays in the
   store and replays on next HA restart.

2. _update_lock had no rollback after detach. If any subsequent await
   raised (e.g. _unsubscribe_listeners, _update_listeners, or any of
   the entity_registry operations), the old timer was already detached
   so autolock was silently disabled until the next reload.
   Fix: extract post-detach body to _update_lock_inner; wrap in
   try/except that re-runs _setup_timer on whichever kmlock is
   currently in self.kmlocks. The store entry was preserved by detach,
   so the rollback resumes the autolock from disk.

New regression tests:
- test_keymaster_timer_on_expired_preserves_store_when_action_raises
  — verifies action failure preserves the entry
- test_update_lock_restores_timer_on_inner_failure — verifies the
  rollback re-attaches the timer after a mid-update exception

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/coordinator.py | 28 +++++++++++-
 custom_components/keymaster/helpers.py     | 16 +++++--
 tests/test_coordinator.py                  | 50 ++++++++++++++++++++++
 tests/test_helpers.py                      | 43 +++++++++++++++++++
 4 files changed, 133 insertions(+), 4 deletions(-)

diff --git a/custom_components/keymaster/coordinator.py b/custom_components/keymaster/coordinator.py
index effffeb4..74e15d69 100644
--- a/custom_components/keymaster/coordinator.py
+++ b/custom_components/keymaster/coordinator.py
@@ -1083,8 +1083,35 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
         # callback can't fire during the await chain. detach() awaits any
         # in-flight _on_expired so any mutations it makes to old (e.g.
         # pending_retry_lock) are visible when we copy state below.
+        # Once detached, the old timer is unusable until the new timer is
+        # set up — so wrap the rest of the body in a try/except that
+        # restores the autolock on whichever kmlock is current if any
+        # subsequent step fails. The store entry was preserved by detach,
+        # so _setup_timer() will resume the autolock from disk.
         if old.autolock_timer:
             await old.autolock_timer.detach()
+        try:
+            await self._update_lock_inner(old, new)
+        except Exception:
+            current = self.kmlocks.get(new.keymaster_config_entry_id)
+            if current is not None:
+                _LOGGER.exception(
+                    "[update_lock] %s: Update failed mid-flight; "
+                    "restoring autolock timer to keep state from being silently lost",
+                    new.lock_name,
+                )
+                try:
+                    await self._setup_timer(current)
+                except Exception:
+                    _LOGGER.exception(
+                        "[update_lock] %s: Failed to restore autolock timer after rollback",
+                        new.lock_name,
+                    )
+            raise
+        return True
+
+    async def _update_lock_inner(self, old: KeymasterLock, new: KeymasterLock) -> None:
+        """Body of _update_lock after the old timer has been detached."""
         await KeymasterCoordinator._unsubscribe_listeners(old)
         # _LOGGER.debug("[update_lock] %s: old: %s", new.lock_name, old)
         del_code_slots: list[int] = [
@@ -1146,7 +1173,6 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
         await self._update_listeners(self.kmlocks[new.keymaster_config_entry_id])
         await self._setup_timer(self.kmlocks[new.keymaster_config_entry_id])
         await self.async_refresh()
-        return True
 
     async def _delete_lock(self, kmlock: KeymasterLock, _: dt) -> None:
         await self._initial_setup_done_event.wait()
diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index a9bc6a19..471813b4 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -240,9 +240,19 @@ async def _on_expired(now: dt) -> None:
                 try:
                     await action(now)
                 except Exception:
-                    _LOGGER.exception("[KeymasterTimer] Action raised in _on_expired")
-                # Cleanup happens regardless of whether detach() ran during
-                # the action; we own the store entry until we finish here.
+                    # Preserve the store entry so the timer replays on next
+                    # HA restart — the action (e.g. locking the door) is
+                    # safety-critical and a failure here means it didn't
+                    # happen yet. Skip the cleanup so retry remains possible.
+                    _LOGGER.exception(
+                        "[KeymasterTimer] Action raised in _on_expired; "
+                        "store entry preserved for retry on restart"
+                    )
+                    return
+                # Action succeeded — clean up. We use _remove_from_store
+                # directly (not cancel()) since cancel() is a noop on
+                # detached timers; an in-flight callback racing with detach
+                # must still remove the entry it owns.
                 await self._remove_from_store()
                 self._cancel_callbacks()
                 self._end_time = None
diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py
index 5ffaa579..be6ef102 100644
--- a/tests/test_coordinator.py
+++ b/tests/test_coordinator.py
@@ -1490,6 +1490,56 @@ async def test_update_lock_copies_pending_retry_lock(self, mock_coordinator):
 
         assert mock_coordinator.kmlocks[entry_id].pending_retry_lock is True
 
+    async def test_update_lock_restores_timer_on_inner_failure(self, mock_coordinator):
+        """Test _update_lock re-attaches the autolock timer if a later step raises.
+
+        Without rollback, an exception in any of the await steps after detach()
+        leaves the kmlock with no working timer until the next reload, silently
+        disabling autolock.
+        """
+        entry_id = "test_entry_rollback"
+
+        old_lock = KeymasterLock(
+            lock_name="test_lock",
+            lock_entity_id="lock.test",
+            keymaster_config_entry_id=entry_id,
+        )
+        old_lock.starting_code_slot = 1
+        old_lock.number_of_code_slots = 1
+        old_lock.code_slots = {1: Mock(accesslimit_day_of_week=None)}
+        old_lock.autolock_timer = Mock()
+        old_lock.autolock_timer.detach = AsyncMock()
+
+        new_lock = KeymasterLock(
+            lock_name="test_lock",
+            lock_entity_id="lock.test",
+            keymaster_config_entry_id=entry_id,
+        )
+        new_lock.starting_code_slot = 1
+        new_lock.number_of_code_slots = 1
+        new_lock.code_slots = {1: Mock(accesslimit_day_of_week=None)}
+
+        mock_coordinator.kmlocks[entry_id] = old_lock
+        mock_coordinator._initial_setup_done_event.set()
+
+        with (
+            # Make _unsubscribe_listeners raise to simulate a failure
+            patch.object(
+                KeymasterCoordinator,
+                "_unsubscribe_listeners",
+                new=AsyncMock(side_effect=RuntimeError("teardown failed")),
+            ),
+            patch.object(mock_coordinator, "_setup_timer", new=AsyncMock()) as mock_setup_timer,
+            pytest.raises(RuntimeError, match="teardown failed"),
+        ):
+            await mock_coordinator._update_lock(new_lock)
+
+        # Rollback must call _setup_timer on the kmlock currently in self.kmlocks
+        # (which is still old_lock since we failed before kmlocks was replaced)
+        # so the autolock isn't silently disabled.
+        mock_setup_timer.assert_called_once()
+        assert mock_setup_timer.call_args.args[0] is mock_coordinator.kmlocks[entry_id]
+
 
 # ============================================================================
 # State Synchronization Tests
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 274f357f..31cd2fcd 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1204,6 +1204,49 @@ async def mock_action(*args):
     assert action_called, "action must fire even if storage cleanup fails"
 
 
+async def test_keymaster_timer_on_expired_preserves_store_when_action_raises(
+    hass, mock_store, store_lock
+):
+    """Test the store entry is preserved when the action raises.
+
+    The lock action is safety-critical. If it fails (e.g. lock service
+    raised), the door is still unlocked — we MUST preserve the store
+    entry so the timer replays on the next HA restart, otherwise the
+    autolock state is silently lost forever.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    async def failing_action(*args):
+        raise RuntimeError("lock service failed")
+
+    await timer.setup(
+        hass, kmlock, failing_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+
+    with patch("custom_components.keymaster.helpers.async_call_later") as mock_call_later:
+        with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+            await timer.start()
+        callback_fn = mock_call_later.call_args[1]["action"]
+
+    # Load returns the entry, so a remove call would actually save
+    mock_store.async_load = AsyncMock(
+        return_value={"test_timer": {"end_time": timer._end_time.isoformat(), "duration": 300}}
+    )
+    mock_store.async_save.reset_mock()
+
+    # Should not raise (action exception is caught), but should NOT remove from store
+    await callback_fn(dt_util.utcnow())
+
+    # Critical: no save call means the store entry was NOT removed
+    mock_store.async_save.assert_not_called()
+
+
 async def test_keymaster_timer_on_expired_skipped_after_detach(hass, mock_store, store_lock):
     """Test _on_expired is a no-op if detach() ran before it could fire.
 

From 7391c0ac9f69955e8135ae0b6a483b82a3a91f73 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sun, 3 May 2026 09:39:03 -0400
Subject: [PATCH 16/23] refactor: inline _update_lock_inner back into
 _update_lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The _update_lock_inner extraction broke mypy's flow narrowing for
old.starting_code_slot / new.code_slots etc. that the outer
validation (return False) used to provide. Inline the body back so
mypy sees the narrowing again — the try/except wrapper still works
with the inlined body. Net: zero new mypy errors vs main baseline.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/coordinator.py | 132 +++++++++++----------
 1 file changed, 67 insertions(+), 65 deletions(-)

diff --git a/custom_components/keymaster/coordinator.py b/custom_components/keymaster/coordinator.py
index 74e15d69..a2503601 100644
--- a/custom_components/keymaster/coordinator.py
+++ b/custom_components/keymaster/coordinator.py
@@ -1091,7 +1091,73 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
         if old.autolock_timer:
             await old.autolock_timer.detach()
         try:
-            await self._update_lock_inner(old, new)
+            await KeymasterCoordinator._unsubscribe_listeners(old)
+            # _LOGGER.debug("[update_lock] %s: old: %s", new.lock_name, old)
+            del_code_slots: list[int] = [
+                old.starting_code_slot + i for i in range(old.number_of_code_slots)
+            ]
+            for code_slot_num in range(
+                new.starting_code_slot,
+                new.starting_code_slot + new.number_of_code_slots,
+            ):
+                if code_slot_num in del_code_slots:
+                    del_code_slots.remove(code_slot_num)
+
+            new.lock_state = old.lock_state
+            new.door_state = old.door_state
+            new.autolock_enabled = old.autolock_enabled
+            new.autolock_min_day = old.autolock_min_day
+            new.autolock_min_night = old.autolock_min_night
+            new.retry_lock = old.retry_lock
+            # Preserve transient retry state; the door-open retry path on the
+            # replacement won't fire its lock action otherwise.
+            new.pending_retry_lock = old.pending_retry_lock
+            for code_slot_num, new_kmslot in new.code_slots.items():
+                if code_slot_num not in old.code_slots:
+                    continue
+                old_kmslot: KeymasterCodeSlot = old.code_slots[code_slot_num]
+                new_kmslot.enabled = old_kmslot.enabled
+                new_kmslot.name = old_kmslot.name
+                new_kmslot.pin = old_kmslot.pin
+                new_kmslot.override_parent = old_kmslot.override_parent
+                new_kmslot.notifications = old_kmslot.notifications
+                new_kmslot.accesslimit_count_enabled = old_kmslot.accesslimit_count_enabled
+                new_kmslot.accesslimit_count = old_kmslot.accesslimit_count
+                new_kmslot.accesslimit_date_range_enabled = (
+                    old_kmslot.accesslimit_date_range_enabled
+                )
+                new_kmslot.accesslimit_date_range_start = old_kmslot.accesslimit_date_range_start
+                new_kmslot.accesslimit_date_range_end = old_kmslot.accesslimit_date_range_end
+                new_kmslot.accesslimit_day_of_week_enabled = (
+                    old_kmslot.accesslimit_day_of_week_enabled
+                )
+                if not new_kmslot.accesslimit_day_of_week:
+                    continue
+                for dow_num, new_dow in new_kmslot.accesslimit_day_of_week.items():
+                    if not old_kmslot.accesslimit_day_of_week:
+                        continue
+                    old_dow: KeymasterCodeSlotDayOfWeek = old_kmslot.accesslimit_day_of_week[
+                        dow_num
+                    ]
+                    new_dow.dow_enabled = old_dow.dow_enabled
+                    new_dow.limit_by_time = old_dow.limit_by_time
+                    new_dow.include_exclude = old_dow.include_exclude
+                    new_dow.time_start = old_dow.time_start
+                    new_dow.time_end = old_dow.time_end
+            self.kmlocks[new.keymaster_config_entry_id] = new
+            # _LOGGER.debug("[update_lock] %s: new: %s", new.lock_name, new)
+            _LOGGER.debug("[update_lock] Code slot entities to delete: %s", del_code_slots)
+            for code_slot_num in del_code_slots:
+                await delete_code_slot_entities(
+                    hass=self.hass,
+                    keymaster_config_entry_id=new.keymaster_config_entry_id,
+                    code_slot_num=code_slot_num,
+                )
+            await self._rebuild_lock_relationships()
+            await self._update_door_and_lock_state()
+            await self._update_listeners(self.kmlocks[new.keymaster_config_entry_id])
+            await self._setup_timer(self.kmlocks[new.keymaster_config_entry_id])
+            await self.async_refresh()
         except Exception:
             current = self.kmlocks.get(new.keymaster_config_entry_id)
             if current is not None:
@@ -1110,70 +1176,6 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
             raise
         return True
 
-    async def _update_lock_inner(self, old: KeymasterLock, new: KeymasterLock) -> None:
-        """Body of _update_lock after the old timer has been detached."""
-        await KeymasterCoordinator._unsubscribe_listeners(old)
-        # _LOGGER.debug("[update_lock] %s: old: %s", new.lock_name, old)
-        del_code_slots: list[int] = [
-            old.starting_code_slot + i for i in range(old.number_of_code_slots)
-        ]
-        for code_slot_num in range(
-            new.starting_code_slot,
-            new.starting_code_slot + new.number_of_code_slots,
-        ):
-            if code_slot_num in del_code_slots:
-                del_code_slots.remove(code_slot_num)
-
-        new.lock_state = old.lock_state
-        new.door_state = old.door_state
-        new.autolock_enabled = old.autolock_enabled
-        new.autolock_min_day = old.autolock_min_day
-        new.autolock_min_night = old.autolock_min_night
-        new.retry_lock = old.retry_lock
-        # Preserve transient retry state; the door-open retry path on the
-        # replacement won't fire its lock action otherwise.
-        new.pending_retry_lock = old.pending_retry_lock
-        for code_slot_num, new_kmslot in new.code_slots.items():
-            if code_slot_num not in old.code_slots:
-                continue
-            old_kmslot: KeymasterCodeSlot = old.code_slots[code_slot_num]
-            new_kmslot.enabled = old_kmslot.enabled
-            new_kmslot.name = old_kmslot.name
-            new_kmslot.pin = old_kmslot.pin
-            new_kmslot.override_parent = old_kmslot.override_parent
-            new_kmslot.notifications = old_kmslot.notifications
-            new_kmslot.accesslimit_count_enabled = old_kmslot.accesslimit_count_enabled
-            new_kmslot.accesslimit_count = old_kmslot.accesslimit_count
-            new_kmslot.accesslimit_date_range_enabled = old_kmslot.accesslimit_date_range_enabled
-            new_kmslot.accesslimit_date_range_start = old_kmslot.accesslimit_date_range_start
-            new_kmslot.accesslimit_date_range_end = old_kmslot.accesslimit_date_range_end
-            new_kmslot.accesslimit_day_of_week_enabled = old_kmslot.accesslimit_day_of_week_enabled
-            if not new_kmslot.accesslimit_day_of_week:
-                continue
-            for dow_num, new_dow in new_kmslot.accesslimit_day_of_week.items():
-                if not old_kmslot.accesslimit_day_of_week:
-                    continue
-                old_dow: KeymasterCodeSlotDayOfWeek = old_kmslot.accesslimit_day_of_week[dow_num]
-                new_dow.dow_enabled = old_dow.dow_enabled
-                new_dow.limit_by_time = old_dow.limit_by_time
-                new_dow.include_exclude = old_dow.include_exclude
-                new_dow.time_start = old_dow.time_start
-                new_dow.time_end = old_dow.time_end
-        self.kmlocks[new.keymaster_config_entry_id] = new
-        # _LOGGER.debug("[update_lock] %s: new: %s", new.lock_name, new)
-        _LOGGER.debug("[update_lock] Code slot entities to delete: %s", del_code_slots)
-        for code_slot_num in del_code_slots:
-            await delete_code_slot_entities(
-                hass=self.hass,
-                keymaster_config_entry_id=new.keymaster_config_entry_id,
-                code_slot_num=code_slot_num,
-            )
-        await self._rebuild_lock_relationships()
-        await self._update_door_and_lock_state()
-        await self._update_listeners(self.kmlocks[new.keymaster_config_entry_id])
-        await self._setup_timer(self.kmlocks[new.keymaster_config_entry_id])
-        await self.async_refresh()
-
     async def _delete_lock(self, kmlock: KeymasterLock, _: dt) -> None:
         await self._initial_setup_done_event.wait()
         _LOGGER.debug("[delete_lock] %s: Triggered", kmlock.lock_name)

From a083c34adade9efd23cf82f3382e3d3a711e6582 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sun, 3 May 2026 09:47:28 -0400
Subject: [PATCH 17/23] fix: address three more Copilot-flagged reload safety
 issues

Three real bugs caught by Copilot review of 7391c0a:

1. setup() didn't reset _detached: rollback path reuses the detached
   timer instance via _setup_timer(current); without resetting the flag
   future cancel() calls would silently no-op and the user couldn't
   stop the restored autolock.

2. setup() recovery path removed the store entry BEFORE running the
   action. If the lock action failed during startup recovery, the entry
   was gone and the autolock state was lost forever instead of being
   replayed on next restart. Mirrors the _on_expired contract: extract
   _run_recovery_action helper that only removes on success.

3. _update_lock rollback only re-attached the timer, not listeners.
   _unsubscribe_listeners runs early in the try block; if a later step
   raises, the surviving lock has no listeners until another reload.
   Add _update_listeners(current) to the rollback path.

Plus coverage gap (behavior D from review): no test was asserting
_persist_to_store / _remove_from_store raise RuntimeError when
_store_lock is missing. Add two short tests so a silent regression
back to log-and-return would be caught.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/coordinator.py | 12 ++-
 custom_components/keymaster/helpers.py     | 37 +++++++--
 tests/test_coordinator.py                  | 23 +++--
 tests/test_helpers.py                      | 97 ++++++++++++++++++++++
 4 files changed, 154 insertions(+), 15 deletions(-)

diff --git a/custom_components/keymaster/coordinator.py b/custom_components/keymaster/coordinator.py
index a2503601..163124c1 100644
--- a/custom_components/keymaster/coordinator.py
+++ b/custom_components/keymaster/coordinator.py
@@ -1163,9 +1163,19 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
             if current is not None:
                 _LOGGER.exception(
                     "[update_lock] %s: Update failed mid-flight; "
-                    "restoring autolock timer to keep state from being silently lost",
+                    "restoring autolock timer and listeners to keep state from being silently lost",
                     new.lock_name,
                 )
+                # Re-attach listeners — _unsubscribe_listeners runs early in
+                # the try block, so a later failure leaves the surviving
+                # kmlock with no event listeners until another reload.
+                try:
+                    await self._update_listeners(current)
+                except Exception:
+                    _LOGGER.exception(
+                        "[update_lock] %s: Failed to restore listeners after rollback",
+                        new.lock_name,
+                    )
                 try:
                     await self._setup_timer(current)
                 except Exception:
diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 471813b4..1aa12960 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -103,11 +103,13 @@ async def setup(
         self._timer_id = timer_id
         self._store_lock = store_lock
         self._store = store
+        # Reset detached state — setup() may be called on a previously-detached
+        # instance (e.g. _update_lock rollback reuses the old timer object);
+        # without this reset, future cancel() calls would silently no-op.
+        self._detached = False
 
-        # Recover persisted timer under the lock so any in-flight persist/cancel
-        # from an outgoing timer (e.g. config entry reload) finishes first. This
-        # prevents reading a stale empty store while the old timer is mid-save.
-        # We call the _locked helper directly since asyncio.Lock isn't reentrant.
+        # Hold the lock across load + cleanup so we don't race a concurrent
+        # persist from the outgoing timer during config entry reload.
         async with self._store_lock:
             data = await store.async_load() or {}
             timer_data = data.get(timer_id)
@@ -128,8 +130,12 @@ async def setup(
                     "[KeymasterTimer] %s: Persisted timer expired during downtime, firing",
                     timer_id,
                 )
-                await self._remove_from_store_locked()
-                hass.async_create_task(call_action(dt_util.utcnow()))
+                # Schedule the action to run on the event loop. The handler
+                # (_run_recovery_action) only removes the store entry on
+                # success, mirroring _on_expired's preserve-on-failure
+                # semantics so a transient action failure replays on the
+                # NEXT restart instead of being lost forever.
+                hass.async_create_task(self._run_recovery_action(dt_util.utcnow()))
             else:
                 _LOGGER.debug(
                     "[KeymasterTimer] %s: Resuming persisted timer, ending %s",
@@ -138,6 +144,25 @@ async def setup(
                 )
                 await self._resume(end_time, duration)
 
+    async def _run_recovery_action(self, now: dt) -> None:
+        """Run the persisted action on startup recovery; only remove on success.
+
+        Mirrors _on_expired's safety contract: if the action raises, the
+        store entry is preserved so the timer replays on the next restart.
+        """
+        if self._call_action is None:
+            return
+        try:
+            await self._call_action(now)
+        except Exception:
+            _LOGGER.exception(
+                "[KeymasterTimer] %s: Recovery action raised; "
+                "store entry preserved for retry on restart",
+                self._timer_id,
+            )
+            return
+        await self._remove_from_store()
+
     async def start(self) -> bool:
         """Start a timer."""
         if not self.hass or not self._kmlock or not self._call_action:
diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py
index be6ef102..bfeb3603 100644
--- a/tests/test_coordinator.py
+++ b/tests/test_coordinator.py
@@ -1523,20 +1523,27 @@ async def test_update_lock_restores_timer_on_inner_failure(self, mock_coordinato
         mock_coordinator._initial_setup_done_event.set()
 
         with (
-            # Make _unsubscribe_listeners raise to simulate a failure
+            # Make _rebuild_lock_relationships raise so we get past the initial
+            # _unsubscribe_listeners (so the rollback has something to restore)
+            patch.object(KeymasterCoordinator, "_unsubscribe_listeners", new=AsyncMock()),
             patch.object(
-                KeymasterCoordinator,
-                "_unsubscribe_listeners",
-                new=AsyncMock(side_effect=RuntimeError("teardown failed")),
+                mock_coordinator,
+                "_rebuild_lock_relationships",
+                new=AsyncMock(side_effect=RuntimeError("rebuild failed")),
             ),
+            patch.object(
+                mock_coordinator, "_update_listeners", new=AsyncMock()
+            ) as mock_update_listeners,
             patch.object(mock_coordinator, "_setup_timer", new=AsyncMock()) as mock_setup_timer,
-            pytest.raises(RuntimeError, match="teardown failed"),
+            pytest.raises(RuntimeError, match="rebuild failed"),
         ):
             await mock_coordinator._update_lock(new_lock)
 
-        # Rollback must call _setup_timer on the kmlock currently in self.kmlocks
-        # (which is still old_lock since we failed before kmlocks was replaced)
-        # so the autolock isn't silently disabled.
+        # Rollback must restore BOTH listeners and timer on the kmlock
+        # currently in self.kmlocks, otherwise autolock and event-driven
+        # state changes are silently disabled.
+        mock_update_listeners.assert_called_once()
+        assert mock_update_listeners.call_args.args[0] is mock_coordinator.kmlocks[entry_id]
         mock_setup_timer.assert_called_once()
         assert mock_setup_timer.call_args.args[0] is mock_coordinator.kmlocks[entry_id]
 
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 31cd2fcd..24190f2d 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1440,6 +1440,103 @@ async def record_save(data):
     assert "test_timer" not in save_calls[-1], "store entry must be removed"
 
 
+async def test_keymaster_timer_setup_recovery_preserves_store_when_action_raises(
+    hass, mock_store, store_lock
+):
+    """Test setup() recovery preserves the store entry if the recovery action raises.
+
+    Mirrors _on_expired's safety contract: if the lock action fails on
+    startup recovery, the entry must stay so the timer replays on next
+    restart instead of being lost forever.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+
+    expired_end_time = (dt_util.utcnow() - timedelta(minutes=5)).isoformat()
+    mock_store.async_load = AsyncMock(
+        return_value={"test_timer": {"end_time": expired_end_time, "duration": 300}}
+    )
+
+    async def failing_action(*args):
+        raise RuntimeError("recovery action failed")
+
+    await timer.setup(
+        hass, kmlock, failing_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    mock_store.async_save.reset_mock()
+    await hass.async_block_till_done()
+
+    # Store entry must be preserved for retry on next restart
+    mock_store.async_save.assert_not_called()
+
+
+async def test_keymaster_timer_setup_resets_detached_flag(hass, mock_store, store_lock):
+    """Test setup() resets _detached so a previously-detached instance is reusable.
+
+    Without this reset, _update_lock's rollback path reuses the detached
+    timer object via _setup_timer(current); subsequent cancel() calls
+    would silently no-op and the user couldn't stop the autolock.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    async def mock_action(*args):
+        pass
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    await timer.detach()
+    assert timer._detached is True
+
+    # Re-setup the same instance (simulating rollback)
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    assert timer._detached is False
+
+
+async def test_keymaster_timer_persist_raises_if_store_lock_missing(hass, mock_store):
+    """Test _persist_to_store raises RuntimeError when _store_lock is missing.
+
+    setup() always sets _store_lock; if it didn't run, persisting would
+    silently lose the autolock state across restarts. The raise converts
+    a silent failure into a loud one.
+    """
+    timer = KeymasterTimer()
+    timer._store = mock_store
+    timer._timer_id = "test_timer"
+    timer._end_time = dt_util.utcnow() + timedelta(minutes=5)
+    # _store_lock intentionally NOT set
+
+    with pytest.raises(RuntimeError, match="store_lock missing"):
+        await timer._persist_to_store()
+
+
+async def test_keymaster_timer_remove_raises_if_store_lock_missing(hass, mock_store):
+    """Test _remove_from_store raises RuntimeError when _store_lock is missing.
+
+    Without raising, a stale entry could remain and fire a phantom
+    autolock on the next restart.
+    """
+    timer = KeymasterTimer()
+    timer._store = mock_store
+    timer._timer_id = "test_timer"
+    # _store_lock intentionally NOT set
+
+    with pytest.raises(RuntimeError, match="store_lock missing"):
+        await timer._remove_from_store()
+
+
 async def test_keymaster_timer_persist_recheck_aborts_after_cancel(hass, mock_store, store_lock):
     """Test _persist_to_store's post-lock recheck aborts when cancel nulled _end_time.
 

From 9861be2d731f4eccc17aa26a64c81d7991cb1ac8 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sun, 3 May 2026 10:09:34 -0400
Subject: [PATCH 18/23] refactor: extract _clear_timer_state() helper

DRY the cleanup quartet (_remove_from_store / _cancel_callbacks /
_end_time / _duration) shared between cancel() and _on_expired's
success path. Per code-simplifier review.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/helpers.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 1aa12960..b43e44a0 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -198,6 +198,16 @@ async def cancel(self, timer_elapsed: dt | None = None) -> None:
             _LOGGER.debug("[KeymasterTimer] Timer elapsed")
         else:
             _LOGGER.debug("[KeymasterTimer] Cancelling auto-lock timer")
+        await self._clear_timer_state()
+
+    async def _clear_timer_state(self) -> None:
+        """Clear the persisted entry and reset in-memory timer fields.
+
+        Called from cancel() (user/coordinator-initiated) and from a
+        successful _on_expired (after the action fired). Direct callers
+        from _on_expired bypass cancel()'s _detached short-circuit so an
+        in-flight callback still removes the entry it owns.
+        """
         self._cancel_callbacks()
         self._end_time = None
         self._duration = None
@@ -274,14 +284,11 @@ async def _on_expired(now: dt) -> None:
                         "store entry preserved for retry on restart"
                     )
                     return
-                # Action succeeded — clean up. We use _remove_from_store
-                # directly (not cancel()) since cancel() is a noop on
+                # Action succeeded — clear state. We call _clear_timer_state
+                # directly instead of cancel() because cancel() is a noop on
                 # detached timers; an in-flight callback racing with detach
                 # must still remove the entry it owns.
-                await self._remove_from_store()
-                self._cancel_callbacks()
-                self._end_time = None
-                self._duration = None
+                await self._clear_timer_state()
             finally:
                 self._on_expired_task = None
 

From 11f1372e835b3336fec7a8235e629bc6affe445b Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sun, 3 May 2026 10:15:30 -0400
Subject: [PATCH 19/23] refactor: centralize lock state-transfer + revert
 recovery to atomic-claim
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two unrelated fixes batched:

1. State transfer: Add KeymasterCodeSlotDayOfWeek.derive_from_existing()
   and KeymasterCodeSlot.derive_from_existing() classmethods, plus
   KeymasterLock.inherit_state_from() instance method that orchestrates
   the entire old→new state transfer. The brittle 40-line copy block in
   coordinator._update_lock collapses to `new.inherit_state_from(old)`.
   Per review: this business logic belongs on the dataclass, not buried
   in the coordinator.

2. setup() recovery race: revert to remove-under-lock + schedule action
   pattern. The previous "background _run_recovery_action that removes
   on success" introduced two new races (Copilot-flagged):
   - A new start() between setup-return and recovery-completion gets
     silently wiped by the recovery's _remove_from_store
   - Two setup()s on the same timer_id (rollback) both see the entry
     before the background recovery removes it → double-fire
   The atomic-claim approach (remove BEFORE scheduling) prevents both.
   Trade-off: recovery action failure loses the autolock state. Accept
   this for the startup-only edge case — user's next lock interaction
   creates a fresh autolock cycle.

   Replace the now-obsolete "preserves on action failure" test with one
   that verifies the atomic-claim semantics: two concurrent setup()s
   yield exactly one action call.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/coordinator.py | 42 +-------------
 custom_components/keymaster/helpers.py     | 34 +++--------
 custom_components/keymaster/lock.py        | 66 ++++++++++++++++++++++
 tests/test_helpers.py                      | 54 ++++++++++++------
 4 files changed, 113 insertions(+), 83 deletions(-)

diff --git a/custom_components/keymaster/coordinator.py b/custom_components/keymaster/coordinator.py
index 163124c1..489c4795 100644
--- a/custom_components/keymaster/coordinator.py
+++ b/custom_components/keymaster/coordinator.py
@@ -1103,47 +1103,7 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
                 if code_slot_num in del_code_slots:
                     del_code_slots.remove(code_slot_num)
 
-            new.lock_state = old.lock_state
-            new.door_state = old.door_state
-            new.autolock_enabled = old.autolock_enabled
-            new.autolock_min_day = old.autolock_min_day
-            new.autolock_min_night = old.autolock_min_night
-            new.retry_lock = old.retry_lock
-            # Preserve transient retry state; the door-open retry path on the
-            # replacement won't fire its lock action otherwise.
-            new.pending_retry_lock = old.pending_retry_lock
-            for code_slot_num, new_kmslot in new.code_slots.items():
-                if code_slot_num not in old.code_slots:
-                    continue
-                old_kmslot: KeymasterCodeSlot = old.code_slots[code_slot_num]
-                new_kmslot.enabled = old_kmslot.enabled
-                new_kmslot.name = old_kmslot.name
-                new_kmslot.pin = old_kmslot.pin
-                new_kmslot.override_parent = old_kmslot.override_parent
-                new_kmslot.notifications = old_kmslot.notifications
-                new_kmslot.accesslimit_count_enabled = old_kmslot.accesslimit_count_enabled
-                new_kmslot.accesslimit_count = old_kmslot.accesslimit_count
-                new_kmslot.accesslimit_date_range_enabled = (
-                    old_kmslot.accesslimit_date_range_enabled
-                )
-                new_kmslot.accesslimit_date_range_start = old_kmslot.accesslimit_date_range_start
-                new_kmslot.accesslimit_date_range_end = old_kmslot.accesslimit_date_range_end
-                new_kmslot.accesslimit_day_of_week_enabled = (
-                    old_kmslot.accesslimit_day_of_week_enabled
-                )
-                if not new_kmslot.accesslimit_day_of_week:
-                    continue
-                for dow_num, new_dow in new_kmslot.accesslimit_day_of_week.items():
-                    if not old_kmslot.accesslimit_day_of_week:
-                        continue
-                    old_dow: KeymasterCodeSlotDayOfWeek = old_kmslot.accesslimit_day_of_week[
-                        dow_num
-                    ]
-                    new_dow.dow_enabled = old_dow.dow_enabled
-                    new_dow.limit_by_time = old_dow.limit_by_time
-                    new_dow.include_exclude = old_dow.include_exclude
-                    new_dow.time_start = old_dow.time_start
-                    new_dow.time_end = old_dow.time_end
+            new.inherit_state_from(old)
             self.kmlocks[new.keymaster_config_entry_id] = new
             # _LOGGER.debug("[update_lock] %s: new: %s", new.lock_name, new)
             _LOGGER.debug("[update_lock] Code slot entities to delete: %s", del_code_slots)
diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index b43e44a0..d40fa68d 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -130,12 +130,15 @@ async def setup(
                     "[KeymasterTimer] %s: Persisted timer expired during downtime, firing",
                     timer_id,
                 )
-                # Schedule the action to run on the event loop. The handler
-                # (_run_recovery_action) only removes the store entry on
-                # success, mirroring _on_expired's preserve-on-failure
-                # semantics so a transient action failure replays on the
-                # NEXT restart instead of being lost forever.
-                hass.async_create_task(self._run_recovery_action(dt_util.utcnow()))
+                # Remove the entry atomically with detection so a concurrent
+                # setup() (e.g. during reload/rollback) can't see the same
+                # expired entry and schedule the recovery action twice. The
+                # trade-off: if the recovery action then fails, the autolock
+                # state is lost (no replay on next restart) — acceptable for
+                # this startup-only edge case since the user's next lock
+                # interaction creates a fresh autolock cycle.
+                await self._remove_from_store_locked()
+                hass.async_create_task(call_action(dt_util.utcnow()))
             else:
                 _LOGGER.debug(
                     "[KeymasterTimer] %s: Resuming persisted timer, ending %s",
@@ -144,25 +147,6 @@ async def setup(
                 )
                 await self._resume(end_time, duration)
 
-    async def _run_recovery_action(self, now: dt) -> None:
-        """Run the persisted action on startup recovery; only remove on success.
-
-        Mirrors _on_expired's safety contract: if the action raises, the
-        store entry is preserved so the timer replays on the next restart.
-        """
-        if self._call_action is None:
-            return
-        try:
-            await self._call_action(now)
-        except Exception:
-            _LOGGER.exception(
-                "[KeymasterTimer] %s: Recovery action raised; "
-                "store entry preserved for retry on restart",
-                self._timer_id,
-            )
-            return
-        await self._remove_from_store()
-
     async def start(self) -> bool:
         """Start a timer."""
         if not self.hass or not self._kmlock or not self._call_action:
diff --git a/custom_components/keymaster/lock.py b/custom_components/keymaster/lock.py
index 0301479e..44770396 100644
--- a/custom_components/keymaster/lock.py
+++ b/custom_components/keymaster/lock.py
@@ -26,6 +26,19 @@ class KeymasterCodeSlotDayOfWeek:
     time_start: dt_time | None = None
     time_end: dt_time | None = None
 
+    @classmethod
+    def derive_from_existing(cls, old: KeymasterCodeSlotDayOfWeek) -> KeymasterCodeSlotDayOfWeek:
+        """Build a new DOW instance carrying the user-configurable state of ``old``."""
+        return cls(
+            day_of_week_num=old.day_of_week_num,
+            day_of_week_name=old.day_of_week_name,
+            dow_enabled=old.dow_enabled,
+            limit_by_time=old.limit_by_time,
+            include_exclude=old.include_exclude,
+            time_start=old.time_start,
+            time_end=old.time_end,
+        )
+
 
 @dataclass
 class KeymasterCodeSlot:
@@ -47,6 +60,34 @@ class KeymasterCodeSlot:
     accesslimit_day_of_week_enabled: bool = False
     accesslimit_day_of_week: MutableMapping[int, KeymasterCodeSlotDayOfWeek] | None = None
 
+    @classmethod
+    def derive_from_existing(cls, old: KeymasterCodeSlot, *, number: int) -> KeymasterCodeSlot:
+        """Build a new code slot for ``number`` carrying user state from ``old``.
+
+        Runtime-only fields (``active``, ``synced``) are intentionally left at
+        their defaults — they belong to the new instance's lifecycle.
+        """
+        new = cls(
+            number=number,
+            enabled=old.enabled,
+            name=old.name,
+            pin=old.pin,
+            override_parent=old.override_parent,
+            notifications=old.notifications,
+            accesslimit_count_enabled=old.accesslimit_count_enabled,
+            accesslimit_count=old.accesslimit_count,
+            accesslimit_date_range_enabled=old.accesslimit_date_range_enabled,
+            accesslimit_date_range_start=old.accesslimit_date_range_start,
+            accesslimit_date_range_end=old.accesslimit_date_range_end,
+            accesslimit_day_of_week_enabled=old.accesslimit_day_of_week_enabled,
+        )
+        if old.accesslimit_day_of_week:
+            new.accesslimit_day_of_week = {
+                dow_num: KeymasterCodeSlotDayOfWeek.derive_from_existing(old_dow)
+                for dow_num, old_dow in old.accesslimit_day_of_week.items()
+            }
+        return new
+
 
 @dataclass
 class KeymasterLock:
@@ -84,6 +125,31 @@ class KeymasterLock:
     # Transient runtime-only field; excluded from persistence (init=False).
     masked_code_slots: set[int] = field(default_factory=set, init=False, repr=False)
 
+    def inherit_state_from(self, old: KeymasterLock) -> None:
+        """Carry user/runtime state from a previous instance into this one.
+
+        Called when a config entry is reloaded: the new lock instance is
+        constructed fresh from config, but state the user owns (autolock
+        config, current lock/door state, code slot contents, in-flight
+        retry) must survive the swap. Owning this on the dataclass keeps
+        the field-by-field copy logic next to the field declarations
+        rather than scattered through the coordinator.
+        """
+        self.lock_state = old.lock_state
+        self.door_state = old.door_state
+        self.autolock_enabled = old.autolock_enabled
+        self.autolock_min_day = old.autolock_min_day
+        self.autolock_min_night = old.autolock_min_night
+        self.retry_lock = old.retry_lock
+        self.pending_retry_lock = old.pending_retry_lock
+        if not self.code_slots or not old.code_slots:
+            return
+        for code_slot_num in self.code_slots:
+            if code_slot_num in old.code_slots:
+                self.code_slots[code_slot_num] = KeymasterCodeSlot.derive_from_existing(
+                    old.code_slots[code_slot_num], number=code_slot_num
+                )
+
 
 keymasterlock_type_lookup: MutableMapping[str, type] = {
     "lock_name": str,
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 24190f2d..fb5e4b65 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1440,38 +1440,58 @@ async def record_save(data):
     assert "test_timer" not in save_calls[-1], "store entry must be removed"
 
 
-async def test_keymaster_timer_setup_recovery_preserves_store_when_action_raises(
+async def test_keymaster_timer_setup_recovery_removes_entry_atomically(
     hass, mock_store, store_lock
 ):
-    """Test setup() recovery preserves the store entry if the recovery action raises.
+    """Test setup() recovery removes the expired entry under the lock.
 
-    Mirrors _on_expired's safety contract: if the lock action fails on
-    startup recovery, the entry must stay so the timer replays on next
-    restart instead of being lost forever.
+    Two setup() calls on the same timer_id (e.g. during reload/rollback)
+    must not both schedule the recovery action. Removing the entry
+    atomically with detection ensures the second setup sees no entry
+    and doesn't schedule a duplicate firing.
     """
-    timer = KeymasterTimer()
+    expired_end_time = (dt_util.utcnow() - timedelta(minutes=5)).isoformat()
+    # Stateful store: load returns whatever is currently saved
+    state: dict = {"test_timer": {"end_time": expired_end_time, "duration": 300}}
+
+    async def stateful_load():
+        return dict(state)
+
+    async def stateful_save(data):
+        state.clear()
+        state.update(data)
+
+    mock_store.async_load = AsyncMock(side_effect=stateful_load)
+    mock_store.async_save = AsyncMock(side_effect=stateful_save)
+
+    action_call_count = 0
+
+    async def mock_action(*args):
+        nonlocal action_call_count
+        action_call_count += 1
+
     kmlock = KeymasterLock(
         lock_name="test_lock",
         lock_entity_id="lock.test_lock",
         keymaster_config_entry_id="test_entry",
     )
 
-    expired_end_time = (dt_util.utcnow() - timedelta(minutes=5)).isoformat()
-    mock_store.async_load = AsyncMock(
-        return_value={"test_timer": {"end_time": expired_end_time, "duration": 300}}
+    # First setup discovers expired entry, claims (removes), schedules action
+    timer1 = KeymasterTimer()
+    await timer1.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
     )
 
-    async def failing_action(*args):
-        raise RuntimeError("recovery action failed")
-
-    await timer.setup(
-        hass, kmlock, failing_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    # Second setup on the SAME timer_id must see no entry and skip scheduling
+    timer2 = KeymasterTimer()
+    await timer2.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
     )
-    mock_store.async_save.reset_mock()
+
     await hass.async_block_till_done()
 
-    # Store entry must be preserved for retry on next restart
-    mock_store.async_save.assert_not_called()
+    assert action_call_count == 1, "recovery action must fire exactly once"
+    assert "test_timer" not in state, "entry must be removed by the recovering setup"
 
 
 async def test_keymaster_timer_setup_resets_detached_flag(hass, mock_store, store_lock):

From 2778cad2d13916bdd4b016df346ba8cf06029392 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sun, 3 May 2026 10:24:35 -0400
Subject: [PATCH 20/23] fix: unsubscribe listeners before detach; reset
 preserved fields in setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two more Copilot-flagged real reload races:

1. Detaching the old timer BEFORE _unsubscribe_listeners exposed a
   window where in-flight provider/door callbacks (lock/door state
   change handlers) could call old.autolock_timer.start()/cancel() on
   the now-detached timer — start() would silently fail (refs nulled),
   cancel() would noop (_detached=True). A user lock/unlock racing
   with reload could either drop the autolock entirely or leave a
   stale store entry that the replacement timer replays.

   Fix: swap the order — unsubscribe listeners first, then detach.
   detach still awaits the in-flight _on_expired so the earlier
   "timer fires during await chain" concern stays addressed.

2. setup() called on a previously-detached instance (rollback path)
   only reset _detached. detach() preserves _end_time/_duration so an
   in-flight persist can still write them — but if setup's load found
   no entry in the store, those preserved fields stayed set on the
   rebound instance, making is_running falsely report True for a
   timer with no scheduled callback.

   Fix: also clear _end_time/_duration in setup(). They get repopulated
   by _resume() if there's a valid persisted entry.

Test updates:
- test_update_lock_detaches_old_timer_before_any_await — invert the
  ordering assertion to match the new correct order
- test_keymaster_timer_setup_resets_detached_and_state — extend to
  also assert _end_time/_duration are cleared and is_running is False
  after re-setup with an empty store

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/coordinator.py | 19 ++++++++--------
 custom_components/keymaster/helpers.py     | 11 ++++++---
 tests/test_coordinator.py                  | 12 ++++++----
 tests/test_helpers.py                      | 26 ++++++++++++++++------
 4 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/custom_components/keymaster/coordinator.py b/custom_components/keymaster/coordinator.py
index 489c4795..563814cf 100644
--- a/custom_components/keymaster/coordinator.py
+++ b/custom_components/keymaster/coordinator.py
@@ -1079,19 +1079,18 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
             or not old.code_slots
         ):
             return False
-        # Detach the old timer BEFORE any other awaits so its scheduled
-        # callback can't fire during the await chain. detach() awaits any
-        # in-flight _on_expired so any mutations it makes to old (e.g.
-        # pending_retry_lock) are visible when we copy state below.
+        # Unsubscribe listeners FIRST so in-flight provider/door callbacks
+        # can't reach the about-to-be-detached timer (where start()/cancel()
+        # would silently fail or no-op). Then detach the old timer; its
+        # detach() awaits any in-flight _on_expired so mutations it made on
+        # old (e.g. pending_retry_lock) are visible to the state copy.
         # Once detached, the old timer is unusable until the new timer is
-        # set up — so wrap the rest of the body in a try/except that
-        # restores the autolock on whichever kmlock is current if any
-        # subsequent step fails. The store entry was preserved by detach,
-        # so _setup_timer() will resume the autolock from disk.
-        if old.autolock_timer:
-            await old.autolock_timer.detach()
+        # set up — wrap the rest in try/except that restores listeners and
+        # the autolock on whichever kmlock is current if any step fails.
         try:
             await KeymasterCoordinator._unsubscribe_listeners(old)
+            if old.autolock_timer:
+                await old.autolock_timer.detach()
             # _LOGGER.debug("[update_lock] %s: old: %s", new.lock_name, old)
             del_code_slots: list[int] = [
                 old.starting_code_slot + i for i in range(old.number_of_code_slots)
diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index d40fa68d..2bd21f6b 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -103,10 +103,15 @@ async def setup(
         self._timer_id = timer_id
         self._store_lock = store_lock
         self._store = store
-        # Reset detached state — setup() may be called on a previously-detached
-        # instance (e.g. _update_lock rollback reuses the old timer object);
-        # without this reset, future cancel() calls would silently no-op.
+        # Reset state that detach() preserved — setup() may be called on a
+        # previously-detached instance (e.g. _update_lock rollback reuses the
+        # old timer object). Without this reset:
+        #   - cancel() would silently no-op (_detached=True)
+        #   - is_running could stay True with no callback scheduled
+        #     (_end_time still set from before detach)
         self._detached = False
+        self._end_time = None
+        self._duration = None
 
         # Hold the lock across load + cleanup so we don't race a concurrent
         # persist from the outgoing timer during config entry reload.
diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py
index bfeb3603..02683137 100644
--- a/tests/test_coordinator.py
+++ b/tests/test_coordinator.py
@@ -1435,11 +1435,15 @@ async def record_setup(*args, **kwargs):
 
         old_lock.autolock_timer.detach.assert_called_once_with()
         mock_setup_timer.assert_called_once()
-        # Critical ordering: detach must run BEFORE any of the await calls
-        # in _update_lock, so the old timer can't fire during the gap.
-        assert call_order[0] == "detach", f"detach must be first, got {call_order}"
+        # Critical ordering: _unsubscribe_listeners runs BEFORE detach so
+        # in-flight provider/door callbacks can't reach the about-to-be
+        # detached timer (where start()/cancel() would silently fail or
+        # no-op). detach must still run before _setup_timer so the new
+        # timer can resume from the preserved store entry.
         assert "unsubscribe_listeners" in call_order
-        assert call_order.index("detach") < call_order.index("unsubscribe_listeners")
+        assert "detach" in call_order
+        assert "setup_timer" in call_order
+        assert call_order.index("unsubscribe_listeners") < call_order.index("detach")
         assert call_order.index("detach") < call_order.index("setup_timer")
         # And the new kmlock (now in self.kmlocks) is what _setup_timer received
         assert mock_setup_timer.call_args.args[0] is mock_coordinator.kmlocks[entry_id]
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index fb5e4b65..ebb3e6d0 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1494,12 +1494,14 @@ async def mock_action(*args):
     assert "test_timer" not in state, "entry must be removed by the recovering setup"
 
 
-async def test_keymaster_timer_setup_resets_detached_flag(hass, mock_store, store_lock):
-    """Test setup() resets _detached so a previously-detached instance is reusable.
-
-    Without this reset, _update_lock's rollback path reuses the detached
-    timer object via _setup_timer(current); subsequent cancel() calls
-    would silently no-op and the user couldn't stop the autolock.
+async def test_keymaster_timer_setup_resets_detached_and_state(hass, mock_store, store_lock):
+    """Test setup() resets _detached AND clears _end_time/_duration.
+
+    Without resetting _detached, cancel() would silently no-op on the
+    rebound instance. Without clearing _end_time/_duration (which detach
+    intentionally preserves), is_running could stay True even though no
+    callback is scheduled — making the coordinator think autolock is
+    active when nothing will fire.
     """
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -1515,14 +1517,24 @@ async def mock_action(*args):
     await timer.setup(
         hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
     )
+    with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+        await timer.start()
+    assert timer._end_time is not None
     await timer.detach()
     assert timer._detached is True
+    assert timer._end_time is not None  # detach preserves these
+    assert timer._duration is not None
+
+    # Empty store so setup's load returns nothing — exercise the early-return path
+    mock_store.async_load = AsyncMock(return_value={})
 
-    # Re-setup the same instance (simulating rollback)
     await timer.setup(
         hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
     )
     assert timer._detached is False
+    assert timer._end_time is None
+    assert timer._duration is None
+    assert not timer.is_running
 
 
 async def test_keymaster_timer_persist_raises_if_store_lock_missing(hass, mock_store):

From 5a0bb9493018dda085fedc37cf1a876fe0743688 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sun, 3 May 2026 10:34:41 -0400
Subject: [PATCH 21/23] fix: rescue preserved state in setup; restore in
 reverse order on rollback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two more Copilot-flagged real reload races:

1. Clearing _end_time/_duration in setup() BEFORE acquiring _store_lock
   meant a queued in-flight _persist_to_store would see None on its
   post-lock recheck and abort, dropping a start() that was issued just
   before reload.

   Fix: don't clear pre-lock. Inside the lock, after the load:
     - If the store has an entry, use it (existing behavior).
     - If the store is empty BUT _end_time is set (preserved by detach
       from a recent start()), claim it: write back + _resume.
     - If the store is empty AND _end_time is None, clear stale fields
       so is_running doesn't lie.

   This handles both Copilot concerns (don't drop pending state, don't
   leave stale fields) without losing the queued persist's data.

2. _update_lock rollback restored listeners BEFORE the timer, mirroring
   the original buggy ordering: a provider/door callback could arrive
   in the gap and hit the still-detached timer or no timer at all.

   Fix: restore in reverse order — _setup_timer first, then
   _update_listeners. Listeners only fire after both timer and
   listeners exist on the rebound kmlock.

Test updates:
- Split test_keymaster_timer_setup_resets_detached_and_state into:
    - test_keymaster_timer_setup_resets_detached_flag (just _detached)
    - test_keymaster_timer_setup_rescues_preserved_state (the new
      rescue+resume behavior)
    - test_keymaster_timer_setup_clears_state_when_no_pending (the
      idle-rebind path)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/coordinator.py | 17 ++--
 custom_components/keymaster/helpers.py     | 37 +++++++--
 tests/test_helpers.py                      | 97 ++++++++++++++++++----
 3 files changed, 122 insertions(+), 29 deletions(-)

diff --git a/custom_components/keymaster/coordinator.py b/custom_components/keymaster/coordinator.py
index 563814cf..f119bae4 100644
--- a/custom_components/keymaster/coordinator.py
+++ b/custom_components/keymaster/coordinator.py
@@ -1125,21 +1125,24 @@ async def _update_lock(self, new: KeymasterLock) -> bool:
                     "restoring autolock timer and listeners to keep state from being silently lost",
                     new.lock_name,
                 )
-                # Re-attach listeners — _unsubscribe_listeners runs early in
-                # the try block, so a later failure leaves the surviving
-                # kmlock with no event listeners until another reload.
+                # Restore in REVERSE order of the teardown (mirror the
+                # listeners-then-detach order in the try block):
+                #   1. _setup_timer first so the rebound timer is fully
+                #      attached before any listener callback can reach it.
+                #   2. _update_listeners last so callbacks only fire after
+                #      both the timer and listeners exist.
                 try:
-                    await self._update_listeners(current)
+                    await self._setup_timer(current)
                 except Exception:
                     _LOGGER.exception(
-                        "[update_lock] %s: Failed to restore listeners after rollback",
+                        "[update_lock] %s: Failed to restore autolock timer after rollback",
                         new.lock_name,
                     )
                 try:
-                    await self._setup_timer(current)
+                    await self._update_listeners(current)
                 except Exception:
                     _LOGGER.exception(
-                        "[update_lock] %s: Failed to restore autolock timer after rollback",
+                        "[update_lock] %s: Failed to restore listeners after rollback",
                         new.lock_name,
                     )
             raise
diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 2bd21f6b..642ebbab 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -103,15 +103,12 @@ async def setup(
         self._timer_id = timer_id
         self._store_lock = store_lock
         self._store = store
-        # Reset state that detach() preserved — setup() may be called on a
-        # previously-detached instance (e.g. _update_lock rollback reuses the
-        # old timer object). Without this reset:
-        #   - cancel() would silently no-op (_detached=True)
-        #   - is_running could stay True with no callback scheduled
-        #     (_end_time still set from before detach)
+        # Reset _detached so cancel() works on the rebound instance.
+        # _end_time/_duration are NOT cleared here — detach() preserved them
+        # for an in-flight persist that may still be queued behind the
+        # store lock; clearing pre-lock would let the queued persist see
+        # None and abort, dropping a start() from just before reload.
         self._detached = False
-        self._end_time = None
-        self._duration = None
 
         # Hold the lock across load + cleanup so we don't race a concurrent
         # persist from the outgoing timer during config entry reload.
@@ -119,6 +116,26 @@ async def setup(
             data = await store.async_load() or {}
             timer_data = data.get(timer_id)
             if not timer_data:
+                # No persisted entry. If detach preserved an in-memory
+                # _end_time (a start() just before reload that hadn't yet
+                # persisted), claim it now by writing+resuming. Otherwise
+                # clear stale fields so is_running doesn't lie.
+                if self._end_time and self._duration is not None:
+                    _LOGGER.debug(
+                        "[KeymasterTimer] %s: Rescuing preserved timer state from "
+                        "pre-reload start(); ending %s",
+                        timer_id,
+                        self._end_time,
+                    )
+                    data[timer_id] = {
+                        "end_time": self._end_time.isoformat(),
+                        "duration": self._duration,
+                    }
+                    await store.async_save(data)
+                    await self._resume(self._end_time, self._duration)
+                else:
+                    self._end_time = None
+                    self._duration = None
                 return
             try:
                 end_time = dt.fromisoformat(timer_data["end_time"])
@@ -128,6 +145,8 @@ async def setup(
                     timer_id,
                 )
                 await self._remove_from_store_locked()
+                self._end_time = None
+                self._duration = None
                 return
             duration = timer_data.get("duration", 0)
             if end_time <= dt_util.utcnow():
@@ -143,6 +162,8 @@ async def setup(
                 # this startup-only edge case since the user's next lock
                 # interaction creates a fresh autolock cycle.
                 await self._remove_from_store_locked()
+                self._end_time = None
+                self._duration = None
                 hass.async_create_task(call_action(dt_util.utcnow()))
             else:
                 _LOGGER.debug(
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index ebb3e6d0..458dbfb8 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1494,14 +1494,45 @@ async def mock_action(*args):
     assert "test_timer" not in state, "entry must be removed by the recovering setup"
 
 
-async def test_keymaster_timer_setup_resets_detached_and_state(hass, mock_store, store_lock):
-    """Test setup() resets _detached AND clears _end_time/_duration.
-
-    Without resetting _detached, cancel() would silently no-op on the
-    rebound instance. Without clearing _end_time/_duration (which detach
-    intentionally preserves), is_running could stay True even though no
-    callback is scheduled — making the coordinator think autolock is
-    active when nothing will fire.
+async def test_keymaster_timer_setup_resets_detached_flag(hass, mock_store, store_lock):
+    """Test setup() resets _detached so the rebound instance's cancel() works.
+
+    Without this reset, cancel() would silently no-op on the rebound timer
+    (used by _update_lock's rollback path).
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    async def mock_action(*args):
+        pass
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    await timer.detach()
+    assert timer._detached is True
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    assert timer._detached is False
+
+
+async def test_keymaster_timer_setup_rescues_preserved_state(hass, mock_store, store_lock):
+    """Test setup() rescues a preserved _end_time when the store has no entry.
+
+    Scenario: start() called just before reload sets _end_time and queues
+    a persist behind the shared store lock. detach() preserves _end_time
+    but doesn't await the persist. If the queued persist hadn't run by
+    the time setup() acquires the lock, the store would still be empty
+    — but the in-memory _end_time is the valid pending autolock state.
+    setup() must claim that state (write it back, _resume from it) so
+    the autolock isn't silently dropped.
     """
     timer = KeymasterTimer()
     kmlock = KeymasterLock(
@@ -1519,19 +1550,57 @@ async def mock_action(*args):
     )
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
         await timer.start()
-    assert timer._end_time is not None
+    preserved_end_time = timer._end_time
+    preserved_duration = timer._duration
     await timer.detach()
-    assert timer._detached is True
-    assert timer._end_time is not None  # detach preserves these
-    assert timer._duration is not None
+    assert timer._end_time == preserved_end_time
+    assert timer._duration == preserved_duration
 
-    # Empty store so setup's load returns nothing — exercise the early-return path
+    # Simulate the queued persist never having written: store returns empty
+    mock_store.async_load = AsyncMock(return_value={})
+    mock_store.async_save.reset_mock()
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+
+    # Rescue: the preserved state was claimed (saved + resumed)
+    mock_store.async_save.assert_called_once()
+    saved = mock_store.async_save.call_args[0][0]
+    assert "test_timer" in saved
+    assert timer.is_running
+
+
+async def test_keymaster_timer_setup_clears_state_when_no_pending(hass, mock_store, store_lock):
+    """Test setup() clears stale fields when nothing to rescue.
+
+    If the timer was idle (no _end_time) before detach AND store is
+    empty, setup() must leave fields cleared so is_running doesn't
+    lie. (Detach only preserves what was set, but a never-started
+    timer that gets detached and re-setup should look idle.)
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+
+    async def mock_action(*args):
+        pass
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    # Manually leave stale state to simulate a degenerate detach
+    timer._end_time = None
+    timer._duration = None
+    await timer.detach()
     mock_store.async_load = AsyncMock(return_value={})
 
     await timer.setup(
         hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
     )
-    assert timer._detached is False
     assert timer._end_time is None
     assert timer._duration is None
     assert not timer.is_running

From d8ce7066e52e6671c1fb841372cbbb051554d2d8 Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sun, 3 May 2026 10:46:27 -0400
Subject: [PATCH 22/23] fix: detach force-persists in-memory state; rescue
 checks expiry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two more Copilot-flagged real reload races:

1. _update_lock creates a FRESH KeymasterTimer for `new`. Its setup()
   can rescue from disk but not from the old timer's in-memory
   _end_time. If the old timer's queued persist hadn't run by the
   time detach() ran, the disk had no entry — and the new timer's
   in-memory _end_time is None — so the autolock was silently lost.

   Fix: detach() now force-persists in-memory state to disk before
   nulling refs. Acquires the store lock, checks if the entry already
   exists (queued persist already ran), and writes only if missing.
   This guarantees the disk has the autolock state for the replacement
   timer's setup() to load.

2. The previous-round rescue branch in setup() didn't check whether
   the preserved _end_time was already in the past. If reload took
   longer than the autolock delay, the rescue would write an expired
   entry back to disk and _resume from it. A subsequent reload before
   the immediate-fire callback ran could replay the same overdue
   autolock.

   Fix: rescue branch now branches on expiry. If expired, schedule
   the recovery action immediately and DON'T write back. If not
   expired, claim and resume as before.

Test updates:
- test_keymaster_timer_detach_preserves_store: load now returns the
  entry that start() persisted, so detach's force-persist sees it
  exists and skips writing
- new test_keymaster_timer_detach_force_persists_when_store_empty:
  verifies the force-persist behavior when queued persist hasn't run
- new test_keymaster_timer_setup_rescues_expired_state_without_resurrecting:
  verifies the expired-rescue path fires the action without writing
  back to disk

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/helpers.py |  65 ++++++++++++----
 tests/test_helpers.py                  | 103 +++++++++++++++++++++++--
 2 files changed, 149 insertions(+), 19 deletions(-)

diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 642ebbab..3cc149cd 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -118,21 +118,34 @@ async def setup(
             if not timer_data:
                 # No persisted entry. If detach preserved an in-memory
                 # _end_time (a start() just before reload that hadn't yet
-                # persisted), claim it now by writing+resuming. Otherwise
-                # clear stale fields so is_running doesn't lie.
+                # persisted), claim it. Otherwise clear stale fields so
+                # is_running doesn't lie.
                 if self._end_time and self._duration is not None:
-                    _LOGGER.debug(
-                        "[KeymasterTimer] %s: Rescuing preserved timer state from "
-                        "pre-reload start(); ending %s",
-                        timer_id,
-                        self._end_time,
-                    )
-                    data[timer_id] = {
-                        "end_time": self._end_time.isoformat(),
-                        "duration": self._duration,
-                    }
-                    await store.async_save(data)
-                    await self._resume(self._end_time, self._duration)
+                    if self._end_time <= dt_util.utcnow():
+                        # Already expired during reload — fire as recovery,
+                        # don't write back (that would let a subsequent
+                        # reload replay the same overdue autolock).
+                        _LOGGER.debug(
+                            "[KeymasterTimer] %s: Preserved timer expired "
+                            "during reload; firing without resurrecting in store",
+                            timer_id,
+                        )
+                        self._end_time = None
+                        self._duration = None
+                        hass.async_create_task(call_action(dt_util.utcnow()))
+                    else:
+                        _LOGGER.debug(
+                            "[KeymasterTimer] %s: Rescuing preserved timer state "
+                            "from pre-reload start(); ending %s",
+                            timer_id,
+                            self._end_time,
+                        )
+                        data[timer_id] = {
+                            "end_time": self._end_time.isoformat(),
+                            "duration": self._duration,
+                        }
+                        await store.async_save(data)
+                        await self._resume(self._end_time, self._duration)
                 else:
                     self._end_time = None
                     self._duration = None
@@ -256,6 +269,30 @@ async def detach(self) -> None:
                 await in_flight
             except Exception:
                 _LOGGER.exception("[KeymasterTimer] In-flight callback raised during detach")
+        # Force-persist any in-memory state so the REPLACEMENT timer (a
+        # fresh KeymasterTimer instance on the new kmlock) can resume from
+        # disk even if a queued _persist_to_store() from a recent start()
+        # hasn't run yet. The replacement instance can't see our in-memory
+        # _end_time, only the store. If the queued persist has already
+        # written, the in-store check makes this a no-op.
+        if (
+            self._end_time
+            and self._duration is not None
+            and self._store
+            and self._store_lock
+            and self._timer_id
+        ):
+            try:
+                async with self._store_lock:
+                    data = await self._store.async_load() or {}
+                    if self._timer_id not in data:
+                        data[self._timer_id] = {
+                            "end_time": self._end_time.isoformat(),
+                            "duration": self._duration,
+                        }
+                        await self._store.async_save(data)
+            except Exception:
+                _LOGGER.exception("[KeymasterTimer] Error force-persisting state during detach")
         self.hass = None
         self._kmlock = None
         self._call_action = None
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 458dbfb8..aea69efe 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1008,15 +1008,18 @@ async def mock_action(*args):
     with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
         await timer.start()
 
-    # Track save/load calls AFTER start() so we only see what detach does
-    mock_store.async_save.reset_mock()
-    mock_store.async_load.reset_mock()
-
     # Sanity: timer has a scheduled callback before detach
     assert len(timer._unsub_events) == 1
     saved_end_time = timer._end_time
     saved_duration = timer._duration
 
+    # The persist from start() already wrote the entry; make load reflect that
+    # so detach's force-persist check sees the entry exists and skips the write.
+    mock_store.async_load = AsyncMock(
+        return_value={"test_timer": {"end_time": saved_end_time.isoformat(), "duration": 300}}
+    )
+    mock_store.async_save.reset_mock()
+
     await timer.detach()
 
     # Callbacks unsubscribed and kmlock binding cleared
@@ -1030,10 +1033,49 @@ async def mock_action(*args):
     # reload would silently lose the autolock state).
     assert timer._end_time == saved_end_time
     assert timer._duration == saved_duration
-    # Critical: store was NOT modified — replacement timer needs to resume from it
+    # Store entry already present, detach's force-persist skipped the write
     mock_store.async_save.assert_not_called()
 
 
+async def test_keymaster_timer_detach_force_persists_when_store_empty(hass, mock_store, store_lock):
+    """Test detach() writes in-memory state to disk if the queued persist hasn't run.
+
+    Scenario: start() called just before reload sets _end_time and queues
+    a _persist_to_store behind the shared lock (held by some other op).
+    detach() runs before that queued persist. Without force-persist, the
+    REPLACEMENT timer (a fresh KeymasterTimer instance on the new kmlock)
+    would load an empty store and the autolock would be silently lost.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    async def mock_action(*args):
+        pass
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+        await timer.start()
+
+    # Simulate the queued persist NOT having run: store is empty
+    mock_store.async_load = AsyncMock(return_value={})
+    mock_store.async_save.reset_mock()
+    saved_end_time = timer._end_time
+
+    await timer.detach()
+
+    mock_store.async_save.assert_called_once()
+    saved_data = mock_store.async_save.call_args[0][0]
+    assert "test_timer" in saved_data
+    assert saved_data["test_timer"]["end_time"] == saved_end_time.isoformat()
+
+
 async def test_keymaster_timer_cancel_after_detach_is_noop(hass, mock_store, store_lock):
     """Test cancel() on a detached timer doesn't touch the store.
 
@@ -1571,6 +1613,57 @@ async def mock_action(*args):
     assert timer.is_running
 
 
+async def test_keymaster_timer_setup_rescues_expired_state_without_resurrecting(
+    hass, mock_store, store_lock
+):
+    """Test setup()'s rescue path takes recovery for an already-expired _end_time.
+
+    If reload took longer than the autolock delay, the preserved _end_time
+    is now in the past. We must NOT write it back to disk + _resume — that
+    would leave an expired entry that a subsequent reload could replay.
+    Instead, fire the recovery action immediately and leave the store clean.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    action_call_count = 0
+
+    async def mock_action(*args):
+        nonlocal action_call_count
+        action_call_count += 1
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    # Manually set an in-the-past _end_time and detach so it's preserved
+    timer._end_time = dt_util.utcnow() - timedelta(seconds=5)
+    timer._duration = 300
+    # Skip the async_load that would interfere; we want detach's force-persist
+    # to NOT write (entry already there in our mock)
+    mock_store.async_load = AsyncMock(
+        return_value={"test_timer": {"end_time": timer._end_time.isoformat(), "duration": 300}}
+    )
+    await timer.detach()
+    # Now empty the store and re-setup: the rescue should detect expiry
+    mock_store.async_load = AsyncMock(return_value={})
+    mock_store.async_save.reset_mock()
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    await hass.async_block_till_done()
+
+    assert action_call_count == 1, "expired-rescue must fire action once"
+    mock_store.async_save.assert_not_called()  # no resurrected entry on disk
+    assert timer._end_time is None
+    assert timer._duration is None
+
+
 async def test_keymaster_timer_setup_clears_state_when_no_pending(hass, mock_store, store_lock):
     """Test setup() clears stale fields when nothing to rescue.
 

From 8dafbb1504b631190b58a8300da9da34bdd7454d Mon Sep 17 00:00:00 2001
From: raman325 <7243222+raman325@users.noreply.github.com>
Date: Sun, 3 May 2026 11:01:17 -0400
Subject: [PATCH 23/23] fix: track recovery actions; propagate force-persist
 failures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three more Copilot-flagged real reload races:

1. setup() recovery actions launched via hass.async_create_task were
   untracked. detach() only awaited _on_expired_task, not these. A
   reload during recovery could copy the old kmlock's state before
   _timer_triggered finished mutating it, losing the recovery work.

   Fix: replace _on_expired_task with a generic _inflight_tasks set
   that tracks both _on_expired callbacks AND setup() recovery
   actions. detach() awaits all of them. Setup uses _track_recovery()
   which adds the task to the set before scheduling.

2. detach()'s force-persist swallowed exceptions and continued. When
   the timer state existed only in memory (the exact race force-persist
   exists for), failure meant the replacement came up empty and
   autolock was silently disabled.

   Fix: propagate the exception. _update_lock's outer try/except
   catches it and runs the rollback path, which re-attempts setup —
   loud failure beats silent state loss.

3. (Deferred) The _lock_locked() in-flight race is acknowledged in a
   reply to the Copilot thread. Real but coordinator-side: the lock
   event handler sets state and awaits notification dismissals BEFORE
   calling cancel(). Fixing it cleanly requires reorganizing the lock
   event handler flow, which is meaningfully larger scope than this
   PR's persistence/reload fixes.

New regression tests:
- test_keymaster_timer_detach_awaits_recovery_action — verifies
  detach blocks for an in-flight setup() recovery action
- test_keymaster_timer_detach_propagates_force_persist_failure —
  verifies the exception is no longer swallowed

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 custom_components/keymaster/helpers.py | 72 +++++++++++++++--------
 tests/test_helpers.py                  | 81 ++++++++++++++++++++++++++
 2 files changed, 130 insertions(+), 23 deletions(-)

diff --git a/custom_components/keymaster/helpers.py b/custom_components/keymaster/helpers.py
index 3cc149cd..87bff9b4 100644
--- a/custom_components/keymaster/helpers.py
+++ b/custom_components/keymaster/helpers.py
@@ -85,7 +85,10 @@ def __init__(self) -> None:
         self._store: Store[dict[str, TimerStoreEntry]] | None = None
         self._store_lock: asyncio.Lock | None = None
         self._detached = False
-        self._on_expired_task: asyncio.Task[None] | None = None
+        # In-flight action tasks (the scheduled _on_expired AND any setup()
+        # recovery action). detach() awaits all of them so their mutations
+        # on the old kmlock are visible to inherit_state_from.
+        self._inflight_tasks: set[asyncio.Task[None]] = set()
 
     async def setup(
         self,
@@ -132,7 +135,7 @@ async def setup(
                         )
                         self._end_time = None
                         self._duration = None
-                        hass.async_create_task(call_action(dt_util.utcnow()))
+                        self._track_recovery(hass, dt_util.utcnow())
                     else:
                         _LOGGER.debug(
                             "[KeymasterTimer] %s: Rescuing preserved timer state "
@@ -177,7 +180,7 @@ async def setup(
                 await self._remove_from_store_locked()
                 self._end_time = None
                 self._duration = None
-                hass.async_create_task(call_action(dt_util.utcnow()))
+                self._track_recovery(hass, dt_util.utcnow())
             else:
                 _LOGGER.debug(
                     "[KeymasterTimer] %s: Resuming persisted timer, ending %s",
@@ -263,18 +266,25 @@ async def detach(self) -> None:
         # Wait for any in-flight _on_expired to finish so callers see its
         # mutations (e.g. pending_retry_lock) on the old kmlock before they
         # copy state to the replacement.
-        in_flight = self._on_expired_task
-        if in_flight is not None and not in_flight.done():
-            try:
-                await in_flight
-            except Exception:
-                _LOGGER.exception("[KeymasterTimer] In-flight callback raised during detach")
+        # Await ALL in-flight action tasks (scheduled _on_expired AND any
+        # setup() recovery actions) so their mutations on the old kmlock
+        # are visible before inherit_state_from copies state to new.
+        for task in list(self._inflight_tasks):
+            if not task.done():
+                try:
+                    await task
+                except Exception:
+                    _LOGGER.exception("[KeymasterTimer] In-flight task raised during detach")
         # Force-persist any in-memory state so the REPLACEMENT timer (a
         # fresh KeymasterTimer instance on the new kmlock) can resume from
         # disk even if a queued _persist_to_store() from a recent start()
         # hasn't run yet. The replacement instance can't see our in-memory
         # _end_time, only the store. If the queued persist has already
         # written, the in-store check makes this a no-op.
+        # Propagate exceptions: when state exists only in memory (the exact
+        # race this exists for), failure means the replacement comes up with
+        # no entry. The caller (_update_lock) needs to know so its rollback
+        # path can attempt recovery instead of silently dropping the autolock.
         if (
             self._end_time
             and self._duration is not None
@@ -282,17 +292,14 @@ async def detach(self) -> None:
             and self._store_lock
             and self._timer_id
         ):
-            try:
-                async with self._store_lock:
-                    data = await self._store.async_load() or {}
-                    if self._timer_id not in data:
-                        data[self._timer_id] = {
-                            "end_time": self._end_time.isoformat(),
-                            "duration": self._duration,
-                        }
-                        await self._store.async_save(data)
-            except Exception:
-                _LOGGER.exception("[KeymasterTimer] Error force-persisting state during detach")
+            async with self._store_lock:
+                data = await self._store.async_load() or {}
+                if self._timer_id not in data:
+                    data[self._timer_id] = {
+                        "end_time": self._end_time.isoformat(),
+                        "duration": self._duration,
+                    }
+                    await self._store.async_save(data)
         self.hass = None
         self._kmlock = None
         self._call_action = None
@@ -310,11 +317,11 @@ async def _on_expired(now: dt) -> None:
             detach() must still remove the entry or the replacement timer's
             setup() will replay the action.
 
-            Registers itself as self._on_expired_task so that detach() can
+            Registers itself in self._inflight_tasks so that detach() can
             await completion — any mutations the action made on the old
             kmlock are then visible to _update_lock's state copy.
             """
-            self._on_expired_task = asyncio.current_task()
+            self._inflight_tasks.add(asyncio.current_task())
             try:
                 if self._call_action is None:
                     return
@@ -337,10 +344,29 @@ async def _on_expired(now: dt) -> None:
                 # must still remove the entry it owns.
                 await self._clear_timer_state()
             finally:
-                self._on_expired_task = None
+                self._inflight_tasks.discard(asyncio.current_task())
 
         self._unsub_events.append(async_call_later(hass=self.hass, delay=delay, action=_on_expired))
 
+    async def _run_recovery_action(self, now: dt) -> None:
+        """Fire a startup-recovery action; tracked by detach()."""
+        if self._call_action is None:
+            return
+        try:
+            await self._call_action(now)
+        except Exception:
+            _LOGGER.exception("[KeymasterTimer] Recovery action raised")
+
+    def _track_recovery(self, hass: HomeAssistant, now: dt) -> None:
+        """Schedule a recovery action and add it to the in-flight set.
+
+        The task is added before being scheduled so detach() can await it
+        even if it hasn't started running yet.
+        """
+        task = hass.async_create_task(self._run_recovery_action(now))
+        self._inflight_tasks.add(task)
+        task.add_done_callback(self._inflight_tasks.discard)
+
     def _cancel_callbacks(self) -> None:
         """Unsubscribe all pending callbacks."""
         for unsub in self._unsub_events:
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index aea69efe..3a0a5491 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1037,6 +1037,87 @@ async def mock_action(*args):
     mock_store.async_save.assert_not_called()
 
 
+async def test_keymaster_timer_detach_awaits_recovery_action(hass, mock_store, store_lock):
+    """Test detach() awaits an in-flight setup() recovery action.
+
+    Without this, a reload during recovery could copy the old kmlock's
+    state before _timer_triggered finishes mutating it (e.g. setting
+    pending_retry_lock=True on door-open), losing those mutations.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+
+    action_started = asyncio.Event()
+    action_release = asyncio.Event()
+    action_completed = False
+
+    async def slow_action(*args):
+        nonlocal action_completed
+        action_started.set()
+        await action_release.wait()
+        action_completed = True
+
+    expired_end_time = (dt_util.utcnow() - timedelta(minutes=5)).isoformat()
+    mock_store.async_load = AsyncMock(
+        return_value={"test_timer": {"end_time": expired_end_time, "duration": 300}}
+    )
+
+    # setup() schedules the recovery as a tracked task
+    await timer.setup(
+        hass, kmlock, slow_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    await action_started.wait()
+    assert not action_completed
+
+    # detach must wait for the recovery to complete
+    detach_task = asyncio.create_task(timer.detach())
+    await asyncio.sleep(0)
+    assert not detach_task.done(), "detach must wait for in-flight recovery action"
+
+    action_release.set()
+    await detach_task
+
+    assert action_completed
+
+
+async def test_keymaster_timer_detach_propagates_force_persist_failure(
+    hass, mock_store, store_lock
+):
+    """Test detach() does NOT swallow force-persist failures.
+
+    When the timer state exists only in memory, a storage failure means
+    the replacement comes up empty. Failing loud lets _update_lock's
+    rollback path attempt recovery instead of silently disabling autolock.
+    """
+    timer = KeymasterTimer()
+    kmlock = KeymasterLock(
+        lock_name="test_lock",
+        lock_entity_id="lock.test_lock",
+        keymaster_config_entry_id="test_entry",
+    )
+    kmlock.autolock_min_day = 5
+
+    async def mock_action(*args):
+        pass
+
+    await timer.setup(
+        hass, kmlock, mock_action, timer_id="test_timer", store=mock_store, store_lock=store_lock
+    )
+    with patch("custom_components.keymaster.helpers.sun.is_up", return_value=True):
+        await timer.start()
+
+    # Make load empty so force-persist would try to write, then make save raise
+    mock_store.async_load = AsyncMock(return_value={})
+    mock_store.async_save = AsyncMock(side_effect=OSError("transient disk error"))
+
+    with pytest.raises(OSError, match="transient disk error"):
+        await timer.detach()
+
+
 async def test_keymaster_timer_detach_force_persists_when_store_empty(hass, mock_store, store_lock):
     """Test detach() writes in-memory state to disk if the queued persist hasn't run.