Description
Describe the issue
Using the model referenced below (see STR) produces a substantially different output when using the WebGPU provider (wrong) vs the WASM provider (correct). The difference is big enough in the output to indicate some bug in some WebGPU operator, similarly to #24070
To further confirm this, I ran inference with the same model using onnx in Python: the correct results are consistent with both the CUDA and the CPU providers.
I'm lost at how to debug this further unfortunately: I'm hitting these problems frequently, is there any specific thing I can do to provide more comprehensive and actionable bug reports?
To reproduce
<html>
<head>
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@dev/dist/ort.min.js"></script>
</head>
<body>
<h1>Example</h1>
<div>
<button id="webinference" onclick="runBrowser()">Run Browser Inference</button>
<img id="image" src="turtle2.png" alt="Image preview..." hidden>
<canvas id="mycanvas"></canvas>
</div>
<div>
<img id="resized" />
</div>
</body>
<script>
async function startInferenceSession(modelPath) {
const session = await ort
.InferenceSession
.create(
modelPath,
{
executionProviders: [
// TODO: set to 'wasm' for debugging. It will take tens of minutes to work!
//'wasm',
'webgpu'
],
graphOptimizationLevel: 'all',
}
);
console.log('Inference session created');
return session;
}
async function runInference(session, input, inputSize) {
const feeds = {};
feeds[session.inputNames[0]] = input;
if (session.inputNames[1]) {
feeds[session.inputNames[1]] = inputSize;
}
const outputData = await session.run(feeds);
return outputData;
}
async function testInference(session, imageData) {
console.log('Testing inference on', imageData);
const tensor = await ort.Tensor.fromImage(imageData)
console.log('Tensor:', tensor)
const inputDims = new ort.Tensor(
"int64",
new BigInt64Array([BigInt(640), BigInt(640)]),
[1, 2],
)
const outData = await runInference(session, tensor, inputDims)
console.log('Output data:', outData)
console.log('Best score: ', Math.max(...outData.scores.data))
}
async function runBrowser() {
let img = document.getElementById('image');
let canvas = document.getElementById("mycanvas");
canvas.width = img.width;
canvas.height = img.height;
var ctx = canvas.getContext("2d");
ctx.drawImage(img, 0, 0, img.width, img.height);
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height)
let session = await startInferenceSession(
"./test-model.onnx"
);
await testInference(session, imageData);
}
async function main() {
let img = document.getElementById('image');
let canvas = document.getElementById("mycanvas");
canvas.width = img.width;
canvas.height = img.height;
var ctx = canvas.getContext("2d");
ctx.drawImage(img, 0, 0, img.width, img.height);
}
main();
</script>
</html>
- Load the page, click on the "Run Browser Inference" button and inspect the browser console.
- With the wasm provider the output shows
Best score: 0.8186498880386353
(correct!), while with WebGPU it showsBest score: 0.03534473478794098
Urgency
My research project is unfortunately blocked on this (I'm falling back on a python backend temporarily)
ONNX Runtime Installation
Released Package
ONNX Runtime Version or Commit ID
1.23.0-dev.20250602-03b22ffc42
Execution Provider
'webgpu' (WebGPU)