Skip to content

Commit 796bc56

Browse files
committed
PNGLoader: Reduce unfiltering branchiness even more.
Use a dummy scanline for y=0 filled with all zeroes to avoid having to check y on every iteration before grabbing color data from scanline[y - 1].
1 parent cd1afde commit 796bc56

File tree

1 file changed

+22
-17
lines changed

1 file changed

+22
-17
lines changed

SharedGraphics/PNGLoader.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,9 @@ union [[gnu::packed]] Pixel {
162162
static_assert(sizeof(Pixel) == 4);
163163

164164
template<bool has_alpha, byte filter_type>
165-
[[gnu::always_inline]] static inline void unfilter_impl(const GraphicsBitmap& bitmap, int y)
165+
[[gnu::always_inline]] static inline void unfilter_impl(const GraphicsBitmap& bitmap, int y, const void* dummy_scanline_data)
166166
{
167+
auto* dummy_scanline = (const Pixel*)dummy_scanline_data;
167168
if constexpr (filter_type == 1) {
168169
auto* pixels = (Pixel*)bitmap.scanline(y);
169170
for (int i = 0; i < bitmap.width(); ++i) {
@@ -181,11 +182,12 @@ template<bool has_alpha, byte filter_type>
181182
}
182183
if constexpr (filter_type == 2) {
183184
auto* pixels = (Pixel*)bitmap.scanline(y);
185+
auto* pixels_y_minus_1 = y == 0 ? dummy_scanline : (Pixel*)bitmap.scanline(y - 1);
184186
for (int i = 0; i < bitmap.width(); ++i) {
185187
auto& x = pixels[i];
186188
swap(x.r, x.b);
187189
Pixel b;
188-
if (y != 0) b.rgba = bitmap.scanline(y - 1)[i];
190+
b.rgba = pixels_y_minus_1[i].rgba;
189191
x.r += b.r;
190192
x.g += b.g;
191193
x.b += b.b;
@@ -197,13 +199,14 @@ template<bool has_alpha, byte filter_type>
197199

198200
if constexpr (filter_type == 3) {
199201
auto* pixels = (Pixel*)bitmap.scanline(y);
202+
auto* pixels_y_minus_1 = y == 0 ? dummy_scanline : (Pixel*)bitmap.scanline(y - 1);
200203
for (int i = 0; i < bitmap.width(); ++i) {
201204
auto& x = pixels[i];
202205
swap(x.r, x.b);
203206
Pixel a;
204207
Pixel b;
205208
if (i != 0) a.rgba = bitmap.scanline(y)[i - 1];
206-
if (y != 0) b.rgba = bitmap.scanline(y - 1)[i];
209+
b.rgba = pixels_y_minus_1[i].rgba;
207210
x.r = x.r + ((a.r + b.r) / 2);
208211
x.g = x.g + ((a.g + b.g) / 2);
209212
x.b = x.b + ((a.b + b.b) / 2);
@@ -215,15 +218,18 @@ template<bool has_alpha, byte filter_type>
215218

216219
if constexpr (filter_type == 4) {
217220
auto* pixels = (Pixel*)bitmap.scanline(y);
221+
auto* pixels_y_minus_1 = y == 0 ? dummy_scanline : (Pixel*)bitmap.scanline(y - 1);
218222
for (int i = 0; i < bitmap.width(); ++i) {
219223
auto& x = pixels[i];
220224
swap(x.r, x.b);
221225
Pixel a;
222226
Pixel b;
223227
Pixel c;
224-
if (i != 0) a.rgba = bitmap.scanline(y)[i - 1];
225-
if (y != 0) b.rgba = bitmap.scanline(y - 1)[i];
226-
if (y != 0 && i != 0) c.rgba = bitmap.scanline(y - 1)[i - 1];
228+
if (i != 0) {
229+
a.rgba = bitmap.scanline(y)[i - 1];
230+
c.rgba = pixels_y_minus_1[i - 1].rgba;
231+
}
232+
b.rgba = pixels_y_minus_1[i].rgba;
227233
x.r += paeth_predictor(a.r, b.r, c.r);
228234
x.g += paeth_predictor(a.g, b.g, c.g);
229235
x.b += paeth_predictor(a.b, b.b, c.b);
@@ -233,9 +239,6 @@ template<bool has_alpha, byte filter_type>
233239
}
234240
}
235241

236-
237-
238-
239242
[[gnu::noinline]] static void unfilter(PNGLoadingContext& context)
240243
{
241244
{
@@ -266,37 +269,39 @@ template<bool has_alpha, byte filter_type>
266269
}
267270
}
268271

272+
auto dummy_scanline = ByteBuffer::create_zeroed(context.width * sizeof(RGBA32));
273+
269274
Stopwatch sw("load_png_impl: unfilter: process");
270275
for (int y = 0; y < context.height; ++y) {
271276
auto filter = context.scanlines[y].filter;
272277
if (filter == 0)
273278
continue;
274279
if (filter == 1) {
275280
if (context.has_alpha())
276-
unfilter_impl<true, 1>(*context.bitmap, y);
281+
unfilter_impl<true, 1>(*context.bitmap, y, dummy_scanline.pointer());
277282
else
278-
unfilter_impl<false, 1>(*context.bitmap, y);
283+
unfilter_impl<false, 1>(*context.bitmap, y, dummy_scanline.pointer());
279284
continue;
280285
}
281286
if (filter == 2) {
282287
if (context.has_alpha())
283-
unfilter_impl<true, 2>(*context.bitmap, y);
288+
unfilter_impl<true, 2>(*context.bitmap, y, dummy_scanline.pointer());
284289
else
285-
unfilter_impl<false, 2>(*context.bitmap, y);
290+
unfilter_impl<false, 2>(*context.bitmap, y, dummy_scanline.pointer());
286291
continue;
287292
}
288293
if (filter == 3) {
289294
if (context.has_alpha())
290-
unfilter_impl<true, 3>(*context.bitmap, y);
295+
unfilter_impl<true, 3>(*context.bitmap, y, dummy_scanline.pointer());
291296
else
292-
unfilter_impl<false, 3>(*context.bitmap, y);
297+
unfilter_impl<false, 3>(*context.bitmap, y, dummy_scanline.pointer());
293298
continue;
294299
}
295300
if (filter == 4) {
296301
if (context.has_alpha())
297-
unfilter_impl<true, 4>(*context.bitmap, y);
302+
unfilter_impl<true, 4>(*context.bitmap, y, dummy_scanline.pointer());
298303
else
299-
unfilter_impl<false, 4>(*context.bitmap, y);
304+
unfilter_impl<false, 4>(*context.bitmap, y, dummy_scanline.pointer());
300305
continue;
301306
}
302307
}

0 commit comments

Comments
 (0)