Skip to content

Commit 4875e3e

Browse files
authored
feat: add mirror option to pre-flip input video for selfie streams (#128)
## Summary Adds an opt-in `mirror` option to `realtime.connect()` that pre-flips the input video on the SDK side, so selfie streams render naturally on the output without applying `transform: scaleX(-1)` on display. This is what unlocks server-baked watermarks for integrators: with the common pattern of CSS-flipping the displayed front-camera video, any pixel-baked watermark gets flipped too. Pre-flipping at the source moves the orientation contract into one place owned by the SDK — the server bakes the watermark in display orientation, and customers render the output as-is. ## Behavior `mirror` defaults to `false` so existing integrations are unchanged. The Safari fallback uses a canvas pipeline; modern Chromium uses `MediaStreamTrackProcessor` with sub-millisecond per-frame cost. ## Usage ```ts const realtimeClient = await client.realtime.connect(stream, { model, mirror: "auto", // mirror only when the input track reports facingMode: "user" onRemoteStream: (s) => { videoEl.srcObject = s; }, }); ``` - `false` (default) — never mirror. - `"auto"` — mirror when `facingMode === "user"` (mobile front cameras). - `true` — always mirror (e.g. desktop webcams, where `facingMode` is typically not reported). <!-- CURSOR_SUMMARY --> --- > [!NOTE] > **Medium Risk** > Adds an opt-in video preprocessing pipeline (MediaStreamTrackProcessor/OffscreenCanvas or canvas fallback) to the real-time connect path, which can impact browser compatibility, performance, and resource cleanup if edge cases are missed. > > **Overview** > Adds a new `mirror` option to `client.realtime.connect()` to pre-flip the *input* video stream (`false` default, `"auto"` based on `facingMode: "user"`, or `true`). > > Implements mirroring via a new `mirror-stream` utility (track-processor path when available, canvas-based fallback otherwise), wires it into the connect flow with warning-based fallback on failure, and ensures mirrored resources are disposed on `disconnect` and on connection errors. > > Updates the React Vite example to use `mirror: "auto"`, documents the option in the SDK README, and adds unit tests covering basic mirror decision logic and no-op behavior for audio-only streams. > > <sup>Reviewed by [Cursor Bugbot](https://cursor.com/bugbot) for commit 730b087. Bugbot is set up for automated code reviews on this repo. Configure [here](https://www.cursor.com/dashboard/bugbot).</sup> <!-- /CURSOR_SUMMARY -->
1 parent ea25fe2 commit 4875e3e

5 files changed

Lines changed: 301 additions & 2 deletions

File tree

examples/react-vite/src/components/VideoStream.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ export function VideoStream({ prompt }: VideoStreamProps) {
4646

4747
const realtimeClient = await client.realtime.connect(stream, {
4848
model,
49+
mirror: "auto",
4950
onRemoteStream: (transformedStream: MediaStream) => {
5051
if (outputRef.current) {
5152
outputRef.current.srcObject = transformedStream;

packages/sdk/README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,23 @@ realtimeClient.setPrompt("Cyberpunk city");
6262
realtimeClient.disconnect();
6363
```
6464

65+
#### Front-camera mirroring
66+
67+
Pre-flip the input stream:
68+
69+
```ts
70+
const realtimeClient = await client.realtime.connect(stream, {
71+
model,
72+
mirror: "auto", // or true to always mirror
73+
// ...
74+
});
75+
```
76+
77+
Options:
78+
- `false` (default) — never mirror.
79+
- `"auto"` — mirror when the input track reports `facingMode: "user"` (mobile front cameras).
80+
- `true` — always mirror (e.g. desktop webcams).
81+
6582
### Async Processing (Queue API)
6683

6784
For video generation jobs, use the queue API to submit jobs and poll for results:

packages/sdk/src/realtime/client.ts

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import type { Logger } from "../utils/logger";
66
import type { DiagnosticEvent } from "./diagnostics";
77
import { createEventBuffer } from "./event-buffer";
88
import { realtimeMethods, type SetInput } from "./methods";
9+
import { createMirroredStream, type MirroredStream, shouldMirrorTrack } from "./mirror-stream";
910
import {
1011
decodeSubscribeToken,
1112
encodeSubscribeToken,
@@ -92,6 +93,13 @@ const realTimeClientConnectOptionsSchema = z.object({
9293
}),
9394
initialState: realTimeClientInitialStateSchema.optional(),
9495
customizeOffer: createAsyncFunctionSchema(z.function()).optional(),
96+
/**
97+
* Pre-flip input video.
98+
* - false (default): never mirror.
99+
* - "auto": mirror when `facingMode === "user"`.
100+
* - true: always mirror.
101+
*/
102+
mirror: z.union([z.literal("auto"), z.boolean()]).optional(),
95103
});
96104
export type RealTimeClientConnectOptions = Omit<z.infer<typeof realTimeClientConnectOptionsSchema>, "model"> & {
97105
model: ModelDefinition | CustomModelDefinition;
@@ -134,8 +142,26 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => {
134142
}
135143

136144
const { onRemoteStream, initialState } = parsedOptions.data;
137-
138-
const inputStream: MediaStream = stream ?? new MediaStream();
145+
const mirror = parsedOptions.data.mirror ?? false;
146+
147+
let inputStream: MediaStream = stream ?? new MediaStream();
148+
149+
let mirroredStream: MirroredStream | undefined;
150+
if (mirror !== false) {
151+
try {
152+
const firstVideoTrack = inputStream.getVideoTracks?.()[0];
153+
if (firstVideoTrack && (mirror === true || shouldMirrorTrack(firstVideoTrack))) {
154+
mirroredStream = createMirroredStream(inputStream, { fps: options.model.fps });
155+
inputStream = mirroredStream.stream;
156+
} else if (mirror === true && !firstVideoTrack) {
157+
logger.warn("mirror: true requested but no video track was found on the input stream");
158+
}
159+
} catch (error) {
160+
logger.warn("Failed to mirror input stream; falling back to un-mirrored input", {
161+
error: error instanceof Error ? error.message : String(error),
162+
});
163+
}
164+
}
139165

140166
let webrtcManager: WebRTCManager | undefined;
141167
let telemetryReporter: ITelemetryReporter = new NullTelemetryReporter();
@@ -321,6 +347,7 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => {
321347
telemetryReporter.stop();
322348
stop();
323349
manager.cleanup();
350+
mirroredStream?.dispose();
324351
},
325352
on: eventEmitter.on,
326353
off: eventEmitter.off,
@@ -347,6 +374,7 @@ export const createRealTimeClient = (opts: RealTimeClientOptions) => {
347374
} catch (error) {
348375
telemetryReporter.stop();
349376
webrtcManager?.cleanup();
377+
mirroredStream?.dispose();
350378
throw error;
351379
}
352380
};
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
// Not in lib.dom yet.
2+
interface MediaStreamTrackProcessorCtor {
3+
new (init: { track: MediaStreamTrack }): { readable: ReadableStream<VideoFrame> };
4+
}
5+
interface MediaStreamTrackGeneratorCtor {
6+
new (init: { kind: "video" }): MediaStreamTrack & { writable: WritableStream<VideoFrame> };
7+
}
8+
9+
type FlipImpl = "track-processor" | "canvas" | "noop";
10+
11+
export interface MirroredStreamOptions {
12+
fps: number;
13+
}
14+
15+
export interface MirroredStream {
16+
stream: MediaStream;
17+
dispose: () => void;
18+
impl: FlipImpl;
19+
}
20+
21+
export function isMediaStreamTrackProcessorSupported(): boolean {
22+
return (
23+
typeof globalThis !== "undefined" &&
24+
typeof (globalThis as { MediaStreamTrackProcessor?: unknown }).MediaStreamTrackProcessor === "function" &&
25+
typeof (globalThis as { MediaStreamTrackGenerator?: unknown }).MediaStreamTrackGenerator === "function"
26+
);
27+
}
28+
29+
export function shouldMirrorTrack(track: MediaStreamTrack): boolean {
30+
if (track.kind !== "video") return false;
31+
let facingMode: string | undefined;
32+
try {
33+
facingMode = track.getSettings?.().facingMode;
34+
} catch {
35+
return false;
36+
}
37+
return facingMode === "user";
38+
}
39+
40+
export function createMirroredStream(input: MediaStream, opts: MirroredStreamOptions): MirroredStream {
41+
const [sourceVideo] = input.getVideoTracks();
42+
const audioTracks = input.getAudioTracks();
43+
44+
if (!sourceVideo) {
45+
return { stream: input, dispose: () => {}, impl: "noop" };
46+
}
47+
48+
if (isMediaStreamTrackProcessorSupported()) {
49+
return createWithTrackProcessor(sourceVideo, audioTracks);
50+
}
51+
return createWithCanvas(sourceVideo, audioTracks, opts.fps);
52+
}
53+
54+
function createWithTrackProcessor(sourceVideo: MediaStreamTrack, audioTracks: MediaStreamTrack[]): MirroredStream {
55+
const Processor = (globalThis as unknown as { MediaStreamTrackProcessor: MediaStreamTrackProcessorCtor })
56+
.MediaStreamTrackProcessor;
57+
const Generator = (globalThis as unknown as { MediaStreamTrackGenerator: MediaStreamTrackGeneratorCtor })
58+
.MediaStreamTrackGenerator;
59+
60+
// Probe 2D context at setup so we fail loud here rather than silently
61+
// passing un-flipped frames through the pipeline.
62+
if (!new OffscreenCanvas(1, 1).getContext("2d")) {
63+
throw new Error("createMirroredStream: OffscreenCanvas 2D context unavailable");
64+
}
65+
66+
const processor = new Processor({ track: sourceVideo });
67+
const generator = new Generator({ kind: "video" });
68+
69+
let canvas = new OffscreenCanvas(1, 1);
70+
let ctx = canvas.getContext("2d") as OffscreenCanvasRenderingContext2D;
71+
72+
const transform = new TransformStream<VideoFrame, VideoFrame>({
73+
transform(frame, controller) {
74+
const w = frame.displayWidth;
75+
const h = frame.displayHeight;
76+
if (canvas.width !== w || canvas.height !== h) {
77+
canvas = new OffscreenCanvas(w, h);
78+
ctx = canvas.getContext("2d") as OffscreenCanvasRenderingContext2D;
79+
}
80+
81+
// VideoFrames hold GPU buffers; close them deterministically even if
82+
// VideoFrame construction or enqueue throws.
83+
let flipped: VideoFrame | undefined;
84+
try {
85+
ctx.save();
86+
ctx.setTransform(-1, 0, 0, 1, w, 0);
87+
ctx.drawImage(frame, 0, 0, w, h);
88+
ctx.restore();
89+
flipped = new VideoFrame(canvas, { timestamp: frame.timestamp, alpha: "discard" });
90+
controller.enqueue(flipped);
91+
flipped = undefined;
92+
} finally {
93+
flipped?.close();
94+
frame.close();
95+
}
96+
},
97+
});
98+
99+
processor.readable
100+
.pipeThrough(transform)
101+
.pipeTo(generator.writable)
102+
.catch(() => {});
103+
104+
const stream = new MediaStream([generator, ...audioTracks]);
105+
106+
let disposed = false;
107+
return {
108+
stream,
109+
impl: "track-processor",
110+
dispose: () => {
111+
if (disposed) return;
112+
disposed = true;
113+
generator.stop();
114+
},
115+
};
116+
}
117+
118+
function createWithCanvas(sourceVideo: MediaStreamTrack, audioTracks: MediaStreamTrack[], fps: number): MirroredStream {
119+
if (typeof document === "undefined") {
120+
throw new Error("createMirroredStream requires a DOM environment (document is undefined)");
121+
}
122+
123+
const canvas = document.createElement("canvas");
124+
const ctx = canvas.getContext("2d");
125+
if (!ctx) {
126+
throw new Error("createMirroredStream: 2D canvas context unavailable");
127+
}
128+
129+
// Resolve the output track before kicking off playback / rAF, so a missing
130+
// captureStream API doesn't leave background work running.
131+
if (typeof canvas.captureStream !== "function") {
132+
throw new Error("createMirroredStream: canvas.captureStream unavailable");
133+
}
134+
const [flippedTrack] = canvas.captureStream(fps).getVideoTracks();
135+
if (!flippedTrack) {
136+
throw new Error("createMirroredStream: canvas.captureStream produced no video track");
137+
}
138+
139+
const video = document.createElement("video");
140+
video.muted = true;
141+
video.playsInline = true;
142+
video.autoplay = true;
143+
video.srcObject = new MediaStream([sourceVideo]);
144+
145+
let disposed = false;
146+
let rafHandle: number | null = null;
147+
148+
const draw = () => {
149+
if (disposed) return;
150+
const w = video.videoWidth;
151+
const h = video.videoHeight;
152+
if (w > 0 && h > 0) {
153+
if (canvas.width !== w) canvas.width = w;
154+
if (canvas.height !== h) canvas.height = h;
155+
ctx.save();
156+
ctx.setTransform(-1, 0, 0, 1, w, 0);
157+
ctx.drawImage(video, 0, 0, w, h);
158+
ctx.restore();
159+
}
160+
rafHandle = requestAnimationFrame(draw);
161+
};
162+
163+
void video.play().catch(() => {});
164+
rafHandle = requestAnimationFrame(draw);
165+
166+
return {
167+
stream: new MediaStream([flippedTrack, ...audioTracks]),
168+
impl: "canvas",
169+
dispose: () => {
170+
if (disposed) return;
171+
disposed = true;
172+
if (rafHandle !== null) cancelAnimationFrame(rafHandle);
173+
flippedTrack.stop();
174+
video.srcObject = null;
175+
},
176+
};
177+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import { describe, expect, it } from "vitest";
2+
import {
3+
createMirroredStream,
4+
isMediaStreamTrackProcessorSupported,
5+
shouldMirrorTrack,
6+
} from "../src/realtime/mirror-stream.js";
7+
8+
function fakeTrack(overrides: Partial<MediaStreamTrack> & { settings?: MediaTrackSettings }): MediaStreamTrack {
9+
const settings = overrides.settings ?? {};
10+
return {
11+
kind: "video",
12+
getSettings: () => settings,
13+
...overrides,
14+
} as unknown as MediaStreamTrack;
15+
}
16+
17+
describe("shouldMirrorTrack", () => {
18+
it("returns true for a front-facing video track", () => {
19+
expect(shouldMirrorTrack(fakeTrack({ settings: { facingMode: "user" } }))).toBe(true);
20+
});
21+
22+
it("returns false for a back-facing video track", () => {
23+
expect(shouldMirrorTrack(fakeTrack({ settings: { facingMode: "environment" } }))).toBe(false);
24+
});
25+
26+
it("returns false when facingMode is unreported", () => {
27+
expect(shouldMirrorTrack(fakeTrack({ settings: {} }))).toBe(false);
28+
});
29+
30+
it("returns false for audio tracks", () => {
31+
expect(shouldMirrorTrack(fakeTrack({ kind: "audio", settings: { facingMode: "user" } }))).toBe(false);
32+
});
33+
34+
it("returns false when getSettings throws", () => {
35+
const track = {
36+
kind: "video",
37+
getSettings: () => {
38+
throw new Error("not supported");
39+
},
40+
} as unknown as MediaStreamTrack;
41+
expect(shouldMirrorTrack(track)).toBe(false);
42+
});
43+
});
44+
45+
describe("isMediaStreamTrackProcessorSupported", () => {
46+
it("returns false in node", () => {
47+
expect(isMediaStreamTrackProcessorSupported()).toBe(false);
48+
});
49+
});
50+
51+
describe("createMirroredStream", () => {
52+
it("passes audio-only streams through as a no-op", () => {
53+
const audioTrack = fakeTrack({ kind: "audio", settings: {} });
54+
const inputStream = {
55+
getVideoTracks: () => [],
56+
getAudioTracks: () => [audioTrack],
57+
} as unknown as MediaStream;
58+
59+
const result = createMirroredStream(inputStream, { fps: 25 });
60+
expect(result.stream).toBe(inputStream);
61+
expect(result.impl).toBe("noop");
62+
});
63+
64+
it("dispose is idempotent on the no-op path", () => {
65+
const inputStream = {
66+
getVideoTracks: () => [],
67+
getAudioTracks: () => [],
68+
} as unknown as MediaStream;
69+
70+
const result = createMirroredStream(inputStream, { fps: 25 });
71+
expect(() => {
72+
result.dispose();
73+
result.dispose();
74+
}).not.toThrow();
75+
});
76+
});

0 commit comments

Comments
 (0)