Skip to content

Commit a7b968f

Browse files
authored
Add browser video recording artifacts (#648)
## Summary - Add browser video recording capture tools for sandbox agents, including start/stop commands and MP4 upload support. - Extend media uploads, artifact storage, streaming, and shared/web types to support video artifacts alongside screenshots. - Surface videos everywhere screenshot media appears in the session UI, including cards, sidebars, lightbox playback, and range-backed media proxying. - Add sandbox image support for ffmpeg and bundled recording guidance skills. ## Validation - `npm run test -w @open-inspect/control-plane -- src/media.test.ts src/session/http/handlers/sandbox.handler.test.ts src/session/sandbox-events.test.ts src/session/http/routes.test.ts` - `npm run test:integration -w @open-inspect/control-plane -- test/integration/media.test.ts` - `python -m pytest packages/sandbox-runtime/tests/test_media_tools.py -q` - `npm test -w @open-inspect/web -- src/hooks/use-session-socket.test.tsx src/components/screenshot-media.test.tsx` - `npm run typecheck -w @open-inspect/control-plane` - `npm run typecheck -w @open-inspect/shared` - `npm run typecheck -w @open-inspect/web` - `npm run lint -w @open-inspect/control-plane` - `npm run lint -w @open-inspect/shared` - `npm run lint -w @open-inspect/web` - `python -m ruff check packages/sandbox-runtime/tests/test_media_tools.py` - `npx prettier --check ...` - `git diff --check` <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added video recording/upload as session media alongside screenshots, playback UI, and HTTP range streaming for partial video requests * Media lists and counts now combine screenshots and videos * **Documentation** * Added video recording and upload workflows to agent-browser, record-video, and visual-verification guides * **Tests** * Expanded unit and integration tests for video upload, metadata parsing, and streaming behavior * **Chores** * Added ffmpeg to sandbox images/tooling for video support <!-- review_stack_entry_start --> [![Review Change Stack](https://storage.googleapis.com/coderabbit_public_assets/review-stack-in-coderabbit-ui.svg)](https://app.coderabbit.ai/change-stack/ColeMurray/background-agents/pull/648?utm_source=github_walkthrough&utm_medium=github&utm_campaign=change_stack) <!-- review_stack_entry_end --> <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent f47515c commit a7b968f

30 files changed

Lines changed: 1560 additions & 99 deletions

File tree

packages/control-plane/src/media.test.ts

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,19 @@ import { describe, expect, it } from "vitest";
22
import {
33
buildMediaObjectKey,
44
detectScreenshotFileType,
5+
detectVideoFileType,
56
isSupportedScreenshotMimeType,
7+
isSupportedVideoMimeType,
68
parseOptionalBoolean,
79
parseOptionalViewport,
10+
parseVideoUploadMetadata,
811
} from "./media";
912

13+
const MP4_SIGNATURE = Uint8Array.from([
14+
0x00, 0x00, 0x00, 0x18, 0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6f, 0x6d, 0x00, 0x00, 0x02, 0x00,
15+
0x69, 0x73, 0x6f, 0x6d, 0x69, 0x73, 0x6f, 0x32,
16+
]);
17+
1018
describe("media helpers", () => {
1119
it("builds session-scoped media object keys", () => {
1220
expect(buildMediaObjectKey("session-1", "artifact-1", "png")).toBe(
@@ -21,6 +29,100 @@ describe("media helpers", () => {
2129
expect(isSupportedScreenshotMimeType("image/gif")).toBe(false);
2230
});
2331

32+
it("accepts only supported video mime types", () => {
33+
expect(isSupportedVideoMimeType("video/mp4")).toBe(true);
34+
expect(isSupportedVideoMimeType("video/webm")).toBe(false);
35+
});
36+
37+
it("detects MP4 videos by ISO BMFF file type bytes", () => {
38+
expect(detectVideoFileType(MP4_SIGNATURE)).toEqual({
39+
mimeType: "video/mp4",
40+
extension: "mp4",
41+
});
42+
});
43+
44+
it("parses required video metadata", () => {
45+
const formData = new FormData();
46+
formData.set("caption", "Menu opens after clicking settings");
47+
formData.set("durationMs", "2500");
48+
formData.set("recordingStartedAt", "1000");
49+
formData.set("recordingEndedAt", "3500");
50+
formData.set("dimensions", '{"width":1280,"height":720}');
51+
formData.set("truncated", "false");
52+
formData.set("sourceUrl", "https://example.com/start");
53+
formData.set("endUrl", "https://example.com/end");
54+
formData.set("hasAudio", "false");
55+
56+
expect(parseVideoUploadMetadata(formData, 4000)).toEqual({
57+
caption: "Menu opens after clicking settings",
58+
durationMs: 2500,
59+
createdAt: 4000,
60+
recordingStartedAt: 1000,
61+
recordingEndedAt: 3500,
62+
dimensions: { width: 1280, height: 720 },
63+
truncated: false,
64+
sourceUrl: "https://example.com/start",
65+
endUrl: "https://example.com/end",
66+
hasAudio: false,
67+
captureSurface: "browser",
68+
source: "agent",
69+
});
70+
});
71+
72+
it.each([
73+
["missing caption", { caption: "" }, "caption is required"],
74+
["non-positive duration", { durationMs: "0" }, "durationMs must be a positive number"],
75+
["decimal duration", { durationMs: "2500.5" }, "durationMs must be a positive integer"],
76+
["exponent duration", { durationMs: "1e3" }, "durationMs must be a positive integer"],
77+
[
78+
"unsafe duration integer",
79+
{ durationMs: "9007199254740992" },
80+
"durationMs must be a safe integer",
81+
],
82+
[
83+
"non-finite timestamp integer",
84+
{ recordingStartedAt: "1".padEnd(310, "0") },
85+
"recordingStartedAt must be a safe integer",
86+
],
87+
["duration above maximum", { durationMs: "90001" }, "durationMs must be 90000 or less"],
88+
[
89+
"invalid dimensions",
90+
{ dimensions: '{"width":0,"height":720}' },
91+
"dimensions must include positive integer width and height",
92+
],
93+
[
94+
"fractional dimensions",
95+
{ dimensions: '{"width":0.4,"height":720}' },
96+
"dimensions must include positive integer width and height",
97+
],
98+
["invalid source URL", { sourceUrl: "not-a-url" }, "sourceUrl must be a valid URL"],
99+
["audio present", { hasAudio: "true" }, "hasAudio must be false"],
100+
[
101+
"timestamp span above maximum",
102+
{ recordingEndedAt: "93001" },
103+
"recording timestamps must span 90000ms or less",
104+
],
105+
[
106+
"duration exceeds timestamp span",
107+
{ durationMs: "5000" },
108+
"durationMs must not exceed the recording timestamp span",
109+
],
110+
])("rejects invalid video metadata: %s", (_label, overrides, message) => {
111+
const formData = new FormData();
112+
formData.set("caption", "Menu opens after clicking settings");
113+
formData.set("durationMs", "2500");
114+
formData.set("recordingStartedAt", "1000");
115+
formData.set("recordingEndedAt", "3500");
116+
formData.set("dimensions", '{"width":1280,"height":720}');
117+
formData.set("truncated", "false");
118+
119+
for (const [name, value] of Object.entries(overrides)) {
120+
formData.set(name, value);
121+
}
122+
123+
expect(() => parseVideoUploadMetadata(formData, 4000)).toThrow(message);
124+
});
125+
24126
it.each([
25127
[
26128
"PNG",

packages/control-plane/src/media.ts

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,59 @@
1+
import type { VideoArtifactMetadata } from "@open-inspect/shared";
2+
13
export const SCREENSHOT_MAX_BYTES = 10 * 1024 * 1024;
24
export const SCREENSHOT_UPLOAD_LIMIT_PER_SESSION = 100;
5+
export const VIDEO_MAX_BYTES = 100 * 1024 * 1024;
6+
export const VIDEO_UPLOAD_LIMIT_PER_SESSION = 20;
7+
export const VIDEO_MAX_DURATION_MS = 90_000;
8+
export const VIDEO_TIMESTAMP_TOLERANCE_MS = 1_000;
39

410
const SCREENSHOT_EXTENSIONS = {
511
"image/png": "png",
612
"image/jpeg": "jpg",
713
"image/webp": "webp",
814
} as const;
915

16+
const VIDEO_EXTENSIONS = {
17+
"video/mp4": "mp4",
18+
} as const;
19+
1020
export type SupportedScreenshotMimeType = keyof typeof SCREENSHOT_EXTENSIONS;
21+
export type SupportedVideoMimeType = keyof typeof VIDEO_EXTENSIONS;
1122

1223
export interface ScreenshotFileType {
1324
mimeType: SupportedScreenshotMimeType;
1425
extension: (typeof SCREENSHOT_EXTENSIONS)[SupportedScreenshotMimeType];
1526
}
1627

28+
export interface VideoFileType {
29+
mimeType: SupportedVideoMimeType;
30+
extension: (typeof VIDEO_EXTENSIONS)[SupportedVideoMimeType];
31+
}
32+
1733
export interface MultipartFileLike {
1834
size: number;
1935
type: string;
2036
arrayBuffer(): Promise<ArrayBuffer>;
2137
}
2238

2339
export type MultipartFieldValue = string | MultipartFileLike;
40+
export type VideoUploadMetadata = Omit<
41+
VideoArtifactMetadata,
42+
"objectKey" | "mimeType" | "sizeBytes"
43+
>;
44+
45+
export interface MultipartFieldsLike {
46+
get(name: string): MultipartFieldValue | null;
47+
}
2448

2549
export function isSupportedScreenshotMimeType(value: string): value is SupportedScreenshotMimeType {
2650
return value in SCREENSHOT_EXTENSIONS;
2751
}
2852

53+
export function isSupportedVideoMimeType(value: string): value is SupportedVideoMimeType {
54+
return value in VIDEO_EXTENSIONS;
55+
}
56+
2957
export function detectScreenshotFileType(bytes: Uint8Array): ScreenshotFileType | null {
3058
if (bytes.length >= 8 && hasPrefix(bytes, [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a])) {
3159
return { mimeType: "image/png", extension: "png" };
@@ -46,6 +74,18 @@ export function detectScreenshotFileType(bytes: Uint8Array): ScreenshotFileType
4674
return null;
4775
}
4876

77+
export function detectVideoFileType(bytes: Uint8Array): VideoFileType | null {
78+
if (
79+
bytes.length >= 12 &&
80+
hasPrefix(bytes.slice(4, 8), [0x66, 0x74, 0x79, 0x70]) &&
81+
isMp4CompatibleBrand(bytes.slice(8, 12))
82+
) {
83+
return { mimeType: "video/mp4", extension: "mp4" };
84+
}
85+
86+
return null;
87+
}
88+
4989
export function buildMediaObjectKey(
5090
sessionId: string,
5191
artifactId: string,
@@ -112,6 +152,166 @@ export function parseOptionalViewport(
112152
};
113153
}
114154

155+
export function parseVideoUploadMetadata(
156+
fields: MultipartFieldsLike,
157+
createdAt = Date.now()
158+
): VideoUploadMetadata {
159+
const caption = parseRequiredString(fields.get("caption"), "caption");
160+
const durationMs = parseRequiredPositiveInteger(fields.get("durationMs"), "durationMs");
161+
if (durationMs > VIDEO_MAX_DURATION_MS) {
162+
throw new Error(`durationMs must be ${VIDEO_MAX_DURATION_MS} or less`);
163+
}
164+
165+
const recordingStartedAt = parseRequiredPositiveInteger(
166+
fields.get("recordingStartedAt"),
167+
"recordingStartedAt"
168+
);
169+
const recordingEndedAt = parseRequiredPositiveInteger(
170+
fields.get("recordingEndedAt"),
171+
"recordingEndedAt"
172+
);
173+
if (recordingEndedAt < recordingStartedAt) {
174+
throw new Error("recordingEndedAt must be greater than or equal to recordingStartedAt");
175+
}
176+
const elapsedMs = recordingEndedAt - recordingStartedAt;
177+
if (elapsedMs > VIDEO_MAX_DURATION_MS + VIDEO_TIMESTAMP_TOLERANCE_MS) {
178+
throw new Error(`recording timestamps must span ${VIDEO_MAX_DURATION_MS}ms or less`);
179+
}
180+
if (durationMs > elapsedMs + VIDEO_TIMESTAMP_TOLERANCE_MS) {
181+
throw new Error("durationMs must not exceed the recording timestamp span");
182+
}
183+
184+
const dimensions = parseRequiredDimensions(fields.get("dimensions"));
185+
const truncated = parseRequiredBoolean(fields.get("truncated"), "truncated");
186+
const sourceUrl = parseOptionalUrl(fields.get("sourceUrl"), "sourceUrl");
187+
const endUrl = parseOptionalUrl(fields.get("endUrl"), "endUrl");
188+
const hasAudio = parseOptionalBoolean(fields.get("hasAudio"));
189+
if (hasAudio === true) {
190+
throw new Error("hasAudio must be false");
191+
}
192+
193+
return {
194+
caption,
195+
durationMs,
196+
createdAt,
197+
recordingStartedAt,
198+
recordingEndedAt,
199+
dimensions,
200+
truncated,
201+
...(sourceUrl ? { sourceUrl } : {}),
202+
...(endUrl ? { endUrl } : {}),
203+
...(hasAudio === false ? { hasAudio: false } : {}),
204+
captureSurface: "browser",
205+
source: "agent",
206+
};
207+
}
208+
115209
function hasPrefix(bytes: Uint8Array, prefix: number[]): boolean {
116210
return prefix.every((value, index) => bytes[index] === value);
117211
}
212+
213+
function isMp4CompatibleBrand(brand: Uint8Array): boolean {
214+
if (brand.length < 4) return false;
215+
const value = String.fromCharCode(...brand);
216+
return (
217+
value === "isom" ||
218+
value === "iso2" ||
219+
value === "mp41" ||
220+
value === "mp42" ||
221+
value === "avc1" ||
222+
value === "M4V "
223+
);
224+
}
225+
226+
function parseRequiredString(value: MultipartFieldValue | null, name: string): string {
227+
if (typeof value !== "string" || value.trim().length === 0) {
228+
throw new Error(`${name} is required`);
229+
}
230+
231+
return value.trim();
232+
}
233+
234+
function parseRequiredPositiveInteger(value: MultipartFieldValue | null, name: string): number {
235+
const stringValue = parseRequiredString(value, name);
236+
if (stringValue === "0") {
237+
throw new Error(`${name} must be a positive number`);
238+
}
239+
if (!/^[1-9]\d*$/.test(stringValue)) {
240+
throw new Error(`${name} must be a positive integer`);
241+
}
242+
243+
const parsed = Number(stringValue);
244+
if (!Number.isSafeInteger(parsed)) {
245+
throw new Error(`${name} must be a safe integer`);
246+
}
247+
248+
return parsed;
249+
}
250+
251+
function parseRequiredBoolean(value: MultipartFieldValue | null, name: string): boolean {
252+
if (value === null) {
253+
throw new Error(`${name} is required`);
254+
}
255+
256+
return parseOptionalBoolean(value) ?? false;
257+
}
258+
259+
function parseRequiredDimensions(value: MultipartFieldValue | null): {
260+
width: number;
261+
height: number;
262+
} {
263+
if (value === null) {
264+
throw new Error("dimensions is required");
265+
}
266+
267+
if (typeof value !== "string") {
268+
throw new Error("dimensions must be a JSON string");
269+
}
270+
271+
let parsed: unknown;
272+
try {
273+
parsed = JSON.parse(value);
274+
} catch {
275+
throw new Error("dimensions must be valid JSON");
276+
}
277+
278+
if (!parsed || typeof parsed !== "object") {
279+
throw new Error("dimensions must be an object");
280+
}
281+
282+
const candidate = parsed as { width?: unknown; height?: unknown };
283+
if (
284+
typeof candidate.width !== "number" ||
285+
!Number.isFinite(candidate.width) ||
286+
!Number.isInteger(candidate.width) ||
287+
candidate.width <= 0 ||
288+
typeof candidate.height !== "number" ||
289+
!Number.isFinite(candidate.height) ||
290+
!Number.isInteger(candidate.height) ||
291+
candidate.height <= 0
292+
) {
293+
throw new Error("dimensions must include positive integer width and height");
294+
}
295+
296+
return {
297+
width: candidate.width,
298+
height: candidate.height,
299+
};
300+
}
301+
302+
function parseOptionalUrl(value: MultipartFieldValue | null, name: string): string | undefined {
303+
if (value === null) return undefined;
304+
if (typeof value !== "string") {
305+
throw new Error(`${name} must be a string`);
306+
}
307+
308+
const trimmed = value.trim();
309+
if (!trimmed) return undefined;
310+
try {
311+
new URL(trimmed);
312+
} catch {
313+
throw new Error(`${name} must be a valid URL`);
314+
}
315+
316+
return trimmed;
317+
}

0 commit comments

Comments
 (0)