From 7e4f4a7a43d2e63be5b54a90f89e8043bfc90168 Mon Sep 17 00:00:00 2001 From: Bruno Sgarbi Date: Mon, 27 Apr 2026 11:15:08 -0300 Subject: [PATCH 1/2] fix(instance.controller): emit remove.instance even when logout fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a Baileys WebSocket dies but the in-memory `waInstances[name]` entry still exists (a "zombie" instance), `deleteInstance()` calls `await this.logout()` which throws "Connection Closed". The throw causes the outer try/catch to skip the `eventEmitter.emit('remove.instance')` call — which is the only mechanism that purges the zombie from `waInstances`. Result: zombies persist in memory until the entire `evo2_api` container is restarted, affecting ALL instances on the host (not just the broken one). Operators have no per-instance recovery path in v2.3.x — their only option is `docker restart`, which forces every connected user to re-scan the QR code. Fix: wrap the inner `logout()` call in its own try/catch. Log a warning when it fails but continue to the cleanup emit. The in-memory entry must be removed regardless of whether logout completed cleanly — `remove.instance` is the canonical way to purge a stuck instance, and DB/cache cleanup happens in the same event handler. This makes `DELETE /instance/:name` idempotent against zombies: a caller can always recover a single instance without nuking the whole host. Refs: - #693 (instance/restart closes the session) - #1286 (Connection Closed in v2.2.3) - #2026 (Sync lost after reboot) - #2027 (Loss of synchronization on reboot) Tested in production at Rigarr (14 instances, ~25k msgs/day) by overlaying this patch on v2.3.7 via Docker. Before: any zombie forced a full container restart. After: per-instance cleanup works cleanly while other vendors stay connected. Signed-off-by: Bruno Cavalcante Sgarbi --- src/api/controllers/instance.controller.ts | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/api/controllers/instance.controller.ts b/src/api/controllers/instance.controller.ts index 6a6910688..7e16d8c02 100644 --- a/src/api/controllers/instance.controller.ts +++ b/src/api/controllers/instance.controller.ts @@ -456,7 +456,18 @@ export class InstanceController { if (this.configService.get('CHATWOOT').ENABLED) waInstances?.clearCacheChatwoot(); if (instance.state === 'connecting' || instance.state === 'open') { - await this.logout({ instanceName }); + try { + await this.logout({ instanceName }); + } catch (logoutError) { + // RIGARR PATCH: zombie instance cleanup. + // When a Baileys socket is dead but waInstances[name] still exists, + // logout() throws "Connection Closed". Without this catch, the + // remove.instance emit below never runs, leaving the zombie in memory + // forever (only fixable by restarting the entire container). + this.logger.warn( + `[ZOMBIE-CLEANUP] logout failed for "${instanceName}" (likely zombie socket): ${logoutError?.toString?.() || logoutError}. Proceeding with cleanup.`, + ); + } } try { From 93b9081a6b88d40cc6706d745fb60a318bee9bc7 Mon Sep 17 00:00:00 2001 From: Bruno Sgarbi Date: Mon, 27 Apr 2026 11:23:53 -0300 Subject: [PATCH 2/2] =?UTF-8?q?review:=20address=20Sourcery=20feedback=20?= =?UTF-8?q?=E2=80=94=20neutral=20language=20+=20log=20error=20object?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per https://github.com/EvolutionAPI/evolution-api/pull/2520 review: 1. Drop vendor-specific markers in code comment and log message (was '[ZOMBIE-CLEANUP]' and 'RIGARR PATCH'). Comment now describes the bug in upstream-friendly terms. 2. Pass the full error object to logger.warn instead of toString(), following the existing convention in monitor.service.ts ('no.connection' handler) where structured object logging is used to preserve diagnostic detail. No behavior change. --- src/api/controllers/instance.controller.ts | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/api/controllers/instance.controller.ts b/src/api/controllers/instance.controller.ts index 7e16d8c02..5379d4f80 100644 --- a/src/api/controllers/instance.controller.ts +++ b/src/api/controllers/instance.controller.ts @@ -458,15 +458,18 @@ export class InstanceController { if (instance.state === 'connecting' || instance.state === 'open') { try { await this.logout({ instanceName }); - } catch (logoutError) { - // RIGARR PATCH: zombie instance cleanup. - // When a Baileys socket is dead but waInstances[name] still exists, - // logout() throws "Connection Closed". Without this catch, the - // remove.instance emit below never runs, leaving the zombie in memory - // forever (only fixable by restarting the entire container). - this.logger.warn( - `[ZOMBIE-CLEANUP] logout failed for "${instanceName}" (likely zombie socket): ${logoutError?.toString?.() || logoutError}. Proceeding with cleanup.`, - ); + } catch (error) { + // logout can throw "Connection Closed" when the underlying Baileys + // socket is already dead but waInstances[name] still exists. We + // must continue to the remove.instance emit below — that is the + // only path that purges the in-memory entry and runs cleaningUp(). + // Without this catch, the stale entry persists until the entire + // process restarts. + this.logger.warn({ + message: 'logout failed during deleteInstance — proceeding with cleanup', + instanceName, + error, + }); } }