Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

memory layout optimizations for JpegDecoderCore #25

Merged
merged 21 commits into from
Dec 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 69 additions & 4 deletions src/ImageSharp/Formats/Jpg/Components/Bits.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,92 @@

namespace ImageSharp.Formats
{
using System.Runtime.CompilerServices;

/// <summary>
/// Holds the unprocessed bits that have been taken from the byte-stream.
/// The n least significant bits of a form the unread bits, to be read in MSB to
/// LSB order.
/// </summary>
internal class Bits
internal struct Bits
{
/// <summary>
/// Gets or sets the accumulator.
/// </summary>
public uint Accumulator { get; set; }
public uint Accumulator;

/// <summary>
/// Gets or sets the mask.
/// <![CDATA[mask==1<<(unreadbits-1) when unreadbits>0, with mask==0 when unreadbits==0.]]>
/// </summary>
public uint Mask { get; set; }
public uint Mask;

/// <summary>
/// Gets or sets the number of unread bits in the accumulator.
/// </summary>
public int UnreadBits { get; set; }
public int UnreadBits;

/// <summary>
/// Reads bytes from the byte buffer to ensure that bits.UnreadBits is at
/// least n. For best performance (avoiding function calls inside hot loops),
/// the caller is the one responsible for first checking that bits.UnreadBits &lt; n.
/// </summary>
/// <param name="n">The number of bits to ensure.</param>
/// <param name="decoder"></param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal JpegDecoderCore.ErrorCodes EnsureNBits(int n, JpegDecoderCore decoder)
{
while (true)
{
JpegDecoderCore.ErrorCodes errorCode;

byte c = decoder.bytes.ReadByteStuffedByte(decoder.inputStream, out errorCode);

if (errorCode != JpegDecoderCore.ErrorCodes.NoError)
{
return errorCode;
}

this.Accumulator = (this.Accumulator << 8) | c;
this.UnreadBits += 8;
if (this.Mask == 0)
{
this.Mask = 1 << 7;
}
else
{
this.Mask <<= 8;
}

if (this.UnreadBits >= n)
{
return JpegDecoderCore.ErrorCodes.NoError;
}
}
}

internal int ReceiveExtend(byte t, JpegDecoderCore decoder)
{
if (this.UnreadBits < t)
{
var errorCode = this.EnsureNBits(t, decoder);
if (errorCode != JpegDecoderCore.ErrorCodes.NoError)
{
throw new JpegDecoderCore.MissingFF00Exception();
}
}

this.UnreadBits -= t;
this.Mask >>= t;
int s = 1 << t;
int x = (int)((this.Accumulator >> this.UnreadBits) & (s - 1));

if (x < (s >> 1))
{
x += ((-1) << t) + 1;
}

return x;
}
}
}
188 changes: 178 additions & 10 deletions src/ImageSharp/Formats/Jpg/Components/Block.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,19 @@
// Copyright (c) James Jackson-South and contributors.
// Licensed under the Apache License, Version 2.0.
// </copyright>

namespace ImageSharp.Formats
{
using System;
using System.Buffers;
using System.Runtime.CompilerServices;

/// <summary>
/// Represents an 8x8 block of coefficients to transform and encode.
/// </summary>
internal class Block
internal struct Block : IDisposable
{
private static readonly ArrayPool<int> ArrayPool = ArrayPool<int>.Create(BlockSize, 50);

/// <summary>
/// Gets the size of the block.
/// </summary>
Expand All @@ -18,27 +23,190 @@ internal class Block
/// <summary>
/// The array of block data.
/// </summary>
private readonly int[] data;
public int[] Data;

public void Init()
{
// this.Data = new int[BlockSize];
this.Data = ArrayPool.Rent(BlockSize);
}

public static Block Create()
{
var block = new Block();
block.Init();
return block;
}

public static Block[] CreateArray(int size)
{
Block[] result = new Block[size];
for (int i = 0; i < result.Length; i++)
{
result[i].Init();
}

return result;
}

public bool IsInitialized => this.Data != null;

/// <summary>
/// Initializes a new instance of the <see cref="Block"/> class.
/// Gets the pixel data at the given block index.
/// </summary>
public Block()
/// <param name="index">The index of the data to return.</param>
/// <returns>
/// The <see cref="int"/>.
/// </returns>
public int this[int index]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
return this.Data[index];
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
set
{
this.Data[index] = value;
}
}

// TODO: Refactor Block.Dispose() callers to always use 'using' or 'finally' statement!
public void Dispose()
{
if (this.Data != null)
{
ArrayPool.Return(this.Data, true);
this.Data = null;
}
}

public static void DisposeAll(Block[] blocks)
{
this.data = new int[BlockSize];
for (int i = 0; i < blocks.Length; i++)
{
blocks[i].Dispose();
}
}

public void Clear()
{
for (int i = 0; i < this.Data.Length; i++)
{
this.Data[i] = 0;
}
}

public Block Clone()
{
Block clone = Create();
Array.Copy(this.Data, clone.Data, BlockSize);
return clone;
}
}

/// <summary>
/// TODO: Should be removed, when JpegEncoderCore is refactored to use Block8x8F
/// Temporal class to make refactoring easier.
/// 1. Refactor Block -> BlockF
/// 2. Test
/// 3. Refactor BlockF -> Block8x8F
/// </summary>
internal struct BlockF : IDisposable
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this class still in use?

Copy link
Member Author

@antonfirsov antonfirsov Dec 4, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strictly speaking: not. But: JpegEncoderCore still has to be refactored to use Block8x8F, and it's more safe to do it in 2 steps, as described in the comment. I think it would be better to remove it (together with the old Blockclass), when JpegEncoderCore refactor is finished.
Or we can keep it in a different repository/branch.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll leave that up to you then depending on how you want to handle the encoder refactor.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do need to fix a merge conflict though in JpegEncoderCore.cs

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could not figure out where's the conflict now, can you help me?

{
private static readonly ArrayPool<float> ArrayPool = ArrayPool<float>.Create(BlockSize, 50);

/// <summary>
/// Size of the block.
/// </summary>
public const int BlockSize = 64;

/// <summary>
/// The array of block data.
/// </summary>
public float[] Data;

public void Init()
{
// this.Data = new int[BlockSize];
this.Data = ArrayPool.Rent(BlockSize);
}

public static BlockF Create()
{
var block = new BlockF();
block.Init();
return block;
}

public static BlockF[] CreateArray(int size)
{
BlockF[] result = new BlockF[size];
for (int i = 0; i < result.Length; i++)
{
result[i].Init();
}

return result;
}

public bool IsInitialized => this.Data != null;

/// <summary>
/// Gets the pixel data at the given block index.
/// </summary>
/// <param name="index">The index of the data to return.</param>
/// <returns>
/// The <see cref="int"/>.
/// </returns>
public int this[int index]
public float this[int index]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
return this.Data[index];
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
set
{
this.Data[index] = value;
}
}

// TODO: Refactor Block.Dispose() callers to always use 'using' or 'finally' statement!
public void Dispose()
{
if (this.Data != null)
{
ArrayPool.Return(this.Data, true);
this.Data = null;
}
}

public static void DisposeAll(BlockF[] blocks)
{
for (int i = 0; i < blocks.Length; i++)
{
blocks[i].Dispose();
}
}

public void Clear()
{
for (int i = 0; i < this.Data.Length; i++)
{
this.Data[i] = 0;
}
}

public BlockF Clone()
{
get { return this.data[index]; }
set { this.data[index] = value; }
BlockF clone = Create();
Array.Copy(this.Data, clone.Data, BlockSize);
return clone;
}
}
}
}
52 changes: 52 additions & 0 deletions src/ImageSharp/Formats/Jpg/Components/Block8x8F.Generated.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// <auto-generated />

using System;
using System.Numerics;
using System.Runtime.CompilerServices;


namespace ImageSharp.Formats
{
internal partial struct Block8x8F
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

✈️ VROOOOOOOOOM......

That's the sound of all this going over my head. Could you please add some comments here so I can figure out what is going on? It's proper into CS territory here.

Copy link
Member Author

@antonfirsov antonfirsov Dec 4, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the purpose of these functions is simple. (e.g. transposing the block) I will try to add some comments here.
A few words about the implementation:
The methods are unrolled loops that work on a constant-sized (8x8) block. In C# it's typically faster to access fields of a struct, than array indexing or unsafe+pointers. (See: Matrix classes in XNA or WPF)
First I started to implement these methods by hand, but quicky realized that I will go faster with T4, and the code will be more reliable. It's also easier to maintain & understand the T4 metaprogram, than the bloated output code.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, just comment in the T4 template then.

{
private static readonly Vector4 CMin4 = new Vector4(-128f);
private static readonly Vector4 CMax4 = new Vector4(127f);
private static readonly Vector4 COff4 = new Vector4(128f);

/// <summary>
/// Transpose the block into d
/// </summary>
/// <param name="d">Destination</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void TransposeInto(ref Block8x8F d)
{
d.V0L.X = V0L.X; d.V1L.X = V0L.Y; d.V2L.X = V0L.Z; d.V3L.X = V0L.W; d.V4L.X = V0R.X; d.V5L.X = V0R.Y; d.V6L.X = V0R.Z; d.V7L.X = V0R.W;
d.V0L.Y = V1L.X; d.V1L.Y = V1L.Y; d.V2L.Y = V1L.Z; d.V3L.Y = V1L.W; d.V4L.Y = V1R.X; d.V5L.Y = V1R.Y; d.V6L.Y = V1R.Z; d.V7L.Y = V1R.W;
d.V0L.Z = V2L.X; d.V1L.Z = V2L.Y; d.V2L.Z = V2L.Z; d.V3L.Z = V2L.W; d.V4L.Z = V2R.X; d.V5L.Z = V2R.Y; d.V6L.Z = V2R.Z; d.V7L.Z = V2R.W;
d.V0L.W = V3L.X; d.V1L.W = V3L.Y; d.V2L.W = V3L.Z; d.V3L.W = V3L.W; d.V4L.W = V3R.X; d.V5L.W = V3R.Y; d.V6L.W = V3R.Z; d.V7L.W = V3R.W;
d.V0R.X = V4L.X; d.V1R.X = V4L.Y; d.V2R.X = V4L.Z; d.V3R.X = V4L.W; d.V4R.X = V4R.X; d.V5R.X = V4R.Y; d.V6R.X = V4R.Z; d.V7R.X = V4R.W;
d.V0R.Y = V5L.X; d.V1R.Y = V5L.Y; d.V2R.Y = V5L.Z; d.V3R.Y = V5L.W; d.V4R.Y = V5R.X; d.V5R.Y = V5R.Y; d.V6R.Y = V5R.Z; d.V7R.Y = V5R.W;
d.V0R.Z = V6L.X; d.V1R.Z = V6L.Y; d.V2R.Z = V6L.Z; d.V3R.Z = V6L.W; d.V4R.Z = V6R.X; d.V5R.Z = V6R.Y; d.V6R.Z = V6R.Z; d.V7R.Z = V6R.W;
d.V0R.W = V7L.X; d.V1R.W = V7L.Y; d.V2R.W = V7L.Z; d.V3R.W = V7L.W; d.V4R.W = V7R.X; d.V5R.W = V7R.Y; d.V6R.W = V7R.Z; d.V7R.W = V7R.W;
}

/// <summary>
/// Level shift by +128, clip to [0, 255]
/// </summary>
/// <param name="d">Destination</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal void TransformByteConvetibleColorValuesInto(ref Block8x8F d)
{
d.V0L = Vector4.Max(Vector4.Min(V0L, CMax4), CMin4) + COff4;d.V0R = Vector4.Max(Vector4.Min(V0R, CMax4), CMin4) + COff4;
d.V1L = Vector4.Max(Vector4.Min(V1L, CMax4), CMin4) + COff4;d.V1R = Vector4.Max(Vector4.Min(V1R, CMax4), CMin4) + COff4;
d.V2L = Vector4.Max(Vector4.Min(V2L, CMax4), CMin4) + COff4;d.V2R = Vector4.Max(Vector4.Min(V2R, CMax4), CMin4) + COff4;
d.V3L = Vector4.Max(Vector4.Min(V3L, CMax4), CMin4) + COff4;d.V3R = Vector4.Max(Vector4.Min(V3R, CMax4), CMin4) + COff4;
d.V4L = Vector4.Max(Vector4.Min(V4L, CMax4), CMin4) + COff4;d.V4R = Vector4.Max(Vector4.Min(V4R, CMax4), CMin4) + COff4;
d.V5L = Vector4.Max(Vector4.Min(V5L, CMax4), CMin4) + COff4;d.V5R = Vector4.Max(Vector4.Min(V5R, CMax4), CMin4) + COff4;
d.V6L = Vector4.Max(Vector4.Min(V6L, CMax4), CMin4) + COff4;d.V6R = Vector4.Max(Vector4.Min(V6R, CMax4), CMin4) + COff4;
d.V7L = Vector4.Max(Vector4.Min(V7L, CMax4), CMin4) + COff4;d.V7R = Vector4.Max(Vector4.Min(V7R, CMax4), CMin4) + COff4;
}


}
}
Loading