Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions services/gastown/src/dos/Town.do.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import * as dispatch from './town/container-dispatch';
import * as patrol from './town/patrol';
import * as scheduling from './town/scheduling';
import * as events from './town/events';
import { stopContainerIfIdle as _stopContainerIfIdle } from './town/container-idle-stop';
import * as scm from './town/town-scm';
import * as reconciler from './town/reconciler';
import { applyAction } from './town/actions';
Expand Down Expand Up @@ -4104,6 +4105,12 @@ export class TownDO extends DurableObject<Env> {
}),
]);

await this.stopContainerIfIdle().catch(err =>
logger.warn('alarm: stopContainerIfIdle failed', {
error: err instanceof Error ? err.message : String(err),
})
);

// Re-arm: fast when active, slow when idle
const interval = activeWork ? ACTIVE_ALARM_INTERVAL_MS : IDLE_ALARM_INTERVAL_MS;
await this.ctx.storage.setAlarm(Date.now() + interval);
Expand Down Expand Up @@ -4164,6 +4171,27 @@ export class TownDO extends DurableObject<Env> {
await this.ctx.storage.put('container:lastTokenRefreshAt', now);
}

/**
* Proactively stop the town container when the town is idle.
*
* Cloudflare's sleepAfter timer resets on any port-8080 traffic (including
* long-lived PTY WebSockets), so containers can stay awake for hours after
* all real work finishes. Delegates to container-idle-stop sub-module.
*/
private async stopContainerIfIdle(): Promise<void> {
await _stopContainerIfIdle({
hasActiveWork: () => this.hasActiveWork(),
isDraining: () => this._draining,
getMayor: () => agents.listAgents(this.sql, { role: 'mayor' })[0] ?? null,
getTownId: () => this.townId,
getLastIdleStopAt: () => this.ctx.storage.get<number>('container:lastIdleStopAt'),
setLastIdleStopAt: (value) => this.ctx.storage.put('container:lastIdleStopAt', value),
getContainerStub: (townId) => getTownContainerStub(this.env, townId),
writeEventFn: (data) => writeEvent(this.env, data),
now: () => Date.now(),
});
}

/**
* Proactively remint KILOCODE_TOKEN when it's approaching expiry.
* Throttled to once per day — the 30-day token is refreshed when
Expand Down
205 changes: 205 additions & 0 deletions services/gastown/src/dos/town/container-idle-stop.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
import { describe, it, expect, vi } from 'vitest';
import {
stopContainerIfIdle,
CONTAINER_IDLE_STOP_THRESHOLD_MS,
CONTAINER_IDLE_STOP_THROTTLE_MS,
type IdleStopDeps,
} from './container-idle-stop';

function makeMayor(overrides: Partial<{ status: string; last_activity_at: string }> = {}) {
return {
id: 'mayor-1',
rig_id: null,
role: 'mayor' as const,
name: 'Mayor',
identity: 'Mayor@test',
status: overrides.status ?? 'idle',
current_hook_bead_id: null,
dispatch_attempts: 0,
last_activity_at: overrides.last_activity_at ?? new Date().toISOString(),
checkpoint: null,
created_at: new Date().toISOString(),
agent_status_message: null,
agent_status_updated_at: null,
};
}

type TestDeps = IdleStopDeps & {
_stopFn: ReturnType<typeof vi.fn>;
_getStateFn: ReturnType<typeof vi.fn>;
_store: Map<string, number>;
_events: Array<{ event: string; townId: string; reason: string; error?: string }>;
};

function makeDeps(overrides: Partial<IdleStopDeps> = {}): TestDeps {
const stopFn = vi.fn().mockResolvedValue(undefined);
const getStateFn = vi.fn().mockResolvedValue({ status: 'running' });
const store = new Map<string, number>();
const events: Array<{ event: string; townId: string; reason: string; error?: string }> = [];

return {
hasActiveWork: overrides.hasActiveWork ?? (() => false),
isDraining: overrides.isDraining ?? (() => false),
getMayor: overrides.getMayor ?? (() => null),
getTownId: overrides.getTownId ?? (() => 'town-1'),
getLastIdleStopAt:
overrides.getLastIdleStopAt ?? (() => Promise.resolve(store.get('container:lastIdleStopAt'))),
setLastIdleStopAt:
overrides.setLastIdleStopAt ??
((value: number) => {
store.set('container:lastIdleStopAt', value);
return Promise.resolve();
}),
getContainerStub:
overrides.getContainerStub ??
(() => ({
getState: getStateFn,
stop: stopFn,
})),
writeEventFn:
overrides.writeEventFn ??
((data) => {
events.push(data);
}),
now: overrides.now ?? (() => Date.now()),
_stopFn: stopFn,
_getStateFn: getStateFn,
_store: store,
_events: events,
} as TestDeps;
}

describe('stopContainerIfIdle', () => {
it('does not stop when town has active work', async () => {
const deps = makeDeps({ hasActiveWork: () => true });
await stopContainerIfIdle(deps);
expect(deps._stopFn).not.toHaveBeenCalled();
});

it('does not stop when draining', async () => {
const deps = makeDeps({ isDraining: () => true });
await stopContainerIfIdle(deps);
expect(deps._stopFn).not.toHaveBeenCalled();
});

it('does not stop when mayor is working', async () => {
const deps = makeDeps({ getMayor: () => makeMayor({ status: 'working' }) });
await stopContainerIfIdle(deps);
expect(deps._stopFn).not.toHaveBeenCalled();
});

it('does not stop when mayor is stalled', async () => {
const deps = makeDeps({ getMayor: () => makeMayor({ status: 'stalled' }) });
await stopContainerIfIdle(deps);
expect(deps._stopFn).not.toHaveBeenCalled();
});

it('does not stop when mayor last_activity_at is within threshold', async () => {
const recentActivity = new Date(Date.now() - 60_000).toISOString();
const deps = makeDeps({ getMayor: () => makeMayor({ last_activity_at: recentActivity }) });
await stopContainerIfIdle(deps);
expect(deps._stopFn).not.toHaveBeenCalled();
});

it('stops container when mayor idle beyond threshold and container is running', async () => {
const oldActivity = new Date(Date.now() - CONTAINER_IDLE_STOP_THRESHOLD_MS - 60_000).toISOString();
const deps = makeDeps({ getMayor: () => makeMayor({ last_activity_at: oldActivity }) });
await stopContainerIfIdle(deps);
expect(deps._stopFn).toHaveBeenCalledTimes(1);
expect(deps._events).toHaveLength(1);
expect(deps._events[0].event).toBe('container.idle_stop');
expect(deps._events[0].reason).toMatch(/^mayor_idle_\d+m$/);
});

it('stops container when no mayor exists (no_active_work reason)', async () => {
const deps = makeDeps({ getMayor: () => null });
await stopContainerIfIdle(deps);
expect(deps._stopFn).toHaveBeenCalledTimes(1);
expect(deps._events[0].reason).toBe('no_active_work');
});

it('stops container when container is healthy', async () => {
const stopFn = vi.fn().mockResolvedValue(undefined);
const deps = makeDeps({
getMayor: () => null,
getContainerStub: () => ({
getState: vi.fn().mockResolvedValue({ status: 'healthy' }),
stop: stopFn,
}),
});
await stopContainerIfIdle(deps);
expect(stopFn).toHaveBeenCalledTimes(1);
expect(deps._events[0].reason).toBe('no_active_work');
});

it('does not stop when container is already stopped', async () => {
const stopFn = vi.fn().mockResolvedValue(undefined);
const deps = makeDeps({
getMayor: () => null,
getContainerStub: () => ({
getState: vi.fn().mockResolvedValue({ status: 'stopped' }),
stop: stopFn,
}),
});
await stopContainerIfIdle(deps);
expect(stopFn).not.toHaveBeenCalled();
});

it('throttles: calling twice within throttle window stops only once', async () => {
const deps = makeDeps({ getMayor: () => null });
await stopContainerIfIdle(deps);
expect(deps._stopFn).toHaveBeenCalledTimes(1);

await stopContainerIfIdle(deps);
expect(deps._stopFn).toHaveBeenCalledTimes(1);
});

it('allows stop again after throttle window passes', async () => {
let currentTime = Date.now();
const deps = makeDeps({
getMayor: () => null,
now: () => currentTime,
});
await stopContainerIfIdle(deps);
expect(deps._stopFn).toHaveBeenCalledTimes(1);

currentTime += CONTAINER_IDLE_STOP_THROTTLE_MS + 1;
await stopContainerIfIdle(deps);
expect(deps._stopFn).toHaveBeenCalledTimes(2);
});

it('logs error and does not set throttle when stop() throws', async () => {
const stopFn = vi.fn().mockRejectedValue(new Error('stop failed'));
const deps = makeDeps({
getMayor: () => null,
getContainerStub: () => ({
getState: vi.fn().mockResolvedValue({ status: 'running' }),
stop: stopFn,
}),
});
await stopContainerIfIdle(deps);

expect(deps._events).toHaveLength(1);
expect(deps._events[0].error).toBe('stop failed');
expect(deps._store.has('container:lastIdleStopAt')).toBe(false);
});

it('returns without stopping when townId is null', async () => {
const deps = makeDeps({ getTownId: () => null, getMayor: () => null });
await stopContainerIfIdle(deps);
expect(deps._stopFn).not.toHaveBeenCalled();
});

it('returns without stopping when getState() throws', async () => {
const stopFn = vi.fn().mockResolvedValue(undefined);
const deps = makeDeps({
getMayor: () => null,
getContainerStub: () => ({
getState: vi.fn().mockRejectedValue(new Error('rpc failed')),
stop: stopFn,
}),
});
await stopContainerIfIdle(deps);
expect(stopFn).not.toHaveBeenCalled();
});
});
81 changes: 81 additions & 0 deletions services/gastown/src/dos/town/container-idle-stop.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/**
* Proactive idle-container stop logic.
*
* Cloudflare's sleepAfter timer resets on any port-8080 traffic (including
* long-lived PTY WebSockets), so containers can stay awake for hours after
* all real work finishes. This module provides the decision logic for
* stopping the container from the TownDO alarm when the town is truly idle.
*/

import { logger } from '../../util/log.util';
import type { Agent } from '../../types';

export const CONTAINER_IDLE_STOP_THRESHOLD_MS = 5 * 60_000;
export const CONTAINER_IDLE_STOP_THROTTLE_MS = 2 * 60_000;

export type IdleStopDeps = {
hasActiveWork: () => boolean;
isDraining: () => boolean;
getMayor: () => Agent | null;
getTownId: () => string | null;
getLastIdleStopAt: () => Promise<number | undefined>;
setLastIdleStopAt: (value: number) => Promise<void>;
getContainerStub: (townId: string) => { getState: () => Promise<{ status: string }>; stop: () => Promise<void> };
writeEventFn: (data: { event: string; townId: string; reason: string; error?: string }) => void;
now: () => number;
};

export async function stopContainerIfIdle(deps: IdleStopDeps): Promise<void> {
if (deps.hasActiveWork()) return;
if (deps.isDraining()) return;

const mayor = deps.getMayor();
const mayorAlive = mayor && (mayor.status === 'working' || mayor.status === 'stalled');
if (mayorAlive) return;

if (mayor && mayor.last_activity_at != null) {
const lastActivity = new Date(mayor.last_activity_at).getTime();
if (deps.now() - lastActivity <= CONTAINER_IDLE_STOP_THRESHOLD_MS) return;
}

const townId = deps.getTownId();
if (!townId) return;

const now = deps.now();
const lastIdleStop = (await deps.getLastIdleStopAt()) ?? 0;
if (now - lastIdleStop < CONTAINER_IDLE_STOP_THROTTLE_MS) return;

const stub = deps.getContainerStub(townId);
let state: { status: string };
try {
state = await stub.getState();
} catch (err) {
logger.warn('stopContainerIfIdle: getState() failed', {
error: err instanceof Error ? err.message : String(err),
});
return;
}

if (state.status !== 'running' && state.status !== 'healthy') return;

const idleMinutes = mayor?.last_activity_at != null
? Math.round((deps.now() - new Date(mayor.last_activity_at).getTime()) / 60_000)
: 0;
const reason = mayor ? `mayor_idle_${idleMinutes}m` : 'no_active_work';

try {
await stub.stop();
await deps.setLastIdleStopAt(now);
deps.writeEventFn({ event: 'container.idle_stop', townId, reason });
} catch (err) {
logger.warn('stopContainerIfIdle: stop() failed', {
error: err instanceof Error ? err.message : String(err),
});
deps.writeEventFn({
event: 'container.idle_stop',
townId,
reason,
error: err instanceof Error ? err.message.slice(0, 300) : String(err).slice(0, 300),
});
}
}