Large diffs are not rendered by default.

@@ -193,6 +193,38 @@ public ReadOnlySpan<byte> GetSpan(ulong va, int size)
}
}

/// <summary>
/// Gets a region of memory that can be written to.
/// </summary>
/// <remarks>
/// If the requested region is not contiguous in physical memory,
/// this will perform an allocation, and flush the data (writing it
/// back to guest memory) on disposal.
/// </remarks>
/// <param name="va">Virtual address of the data</param>
/// <param name="size">Size of the data</param>
/// <returns>A writable region of memory containing the data</returns>
public WritableRegion GetWritableRegion(ulong va, int size)
{
if (size == 0)
{
return new WritableRegion(null, va, Memory<byte>.Empty);
}

if (IsContiguous(va, size))
{
return new WritableRegion(null, va, _backingMemory.GetMemory(GetPhysicalAddressInternal(va), size));
}
else
{
Memory<byte> memory = new byte[size];

GetSpan(va, size).CopyTo(memory.Span);

return new WritableRegion(this, va, memory);
}
}

/// <summary>
/// Gets a reference for the given type at the specified virtual memory address.
/// </summary>
@@ -0,0 +1,29 @@
using System;

namespace Ryujinx.Cpu
{
public sealed class WritableRegion : IDisposable
{
private readonly MemoryManager _mm;
private readonly ulong _va;

private bool NeedsWriteback => _mm != null;

public Memory<byte> Memory { get; }

internal WritableRegion(MemoryManager mm, ulong va, Memory<byte> memory)
{
_mm = mm;
_va = va;
Memory = memory;
}

public void Dispose()
{
if (NeedsWriteback)
{
_mm.Write(_va, Memory.Span);
}
}
}
}
@@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.Device
{
public enum AccessControl
{
None = 0,
ReadOnly = 1 << 0,
WriteOnly = 1 << 1,
ReadWrite = ReadOnly | WriteOnly
}
}
@@ -0,0 +1,124 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Reflection;
using System.Runtime.CompilerServices;

namespace Ryujinx.Graphics.Device
{
public class DeviceState<TState> : IDeviceState where TState : unmanaged
{
private const int RegisterSize = sizeof(int);

public TState State;

private readonly BitArray _readableRegisters;
private readonly BitArray _writableRegisters;

private readonly Dictionary<int, Func<int>> _readCallbacks;
private readonly Dictionary<int, Action<int>> _writeCallbacks;

public DeviceState(IReadOnlyDictionary<string, RwCallback> callbacks = null)
{
int size = (Unsafe.SizeOf<TState>() + RegisterSize - 1) / RegisterSize;

_readableRegisters = new BitArray(size);
_writableRegisters = new BitArray(size);

_readCallbacks = new Dictionary<int, Func<int>>();
_writeCallbacks = new Dictionary<int, Action<int>>();

var fields = typeof(TState).GetFields();
int offset = 0;

for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++)
{
var field = fields[fieldIndex];
var regAttr = field.GetCustomAttributes<RegisterAttribute>(false).FirstOrDefault();

int sizeOfField = SizeCalculator.SizeOf(field.FieldType);

for (int i = 0; i < ((sizeOfField + 3) & ~3); i += 4)
{
_readableRegisters[(offset + i) / RegisterSize] = regAttr?.AccessControl.HasFlag(AccessControl.ReadOnly) ?? true;
_writableRegisters[(offset + i) / RegisterSize] = regAttr?.AccessControl.HasFlag(AccessControl.WriteOnly) ?? true;
}

if (callbacks != null && callbacks.TryGetValue(field.Name, out var cb))
{
if (cb.Read != null)
{
_readCallbacks.Add(offset, cb.Read);
}

if (cb.Write != null)
{
_writeCallbacks.Add(offset, cb.Write);
}
}

offset += sizeOfField;
}

Debug.Assert(offset == Unsafe.SizeOf<TState>());
}

public virtual int Read(int offset)
{
if (Check(offset) && _readableRegisters[offset / RegisterSize])
{
int alignedOffset = Align(offset);

if (_readCallbacks.TryGetValue(alignedOffset, out Func<int> read))
{
return read();
}
else
{
return GetRef<int>(alignedOffset);
}
}

return 0;
}

public virtual void Write(int offset, int data)
{
if (Check(offset) && _writableRegisters[offset / RegisterSize])
{
int alignedOffset = Align(offset);

if (_writeCallbacks.TryGetValue(alignedOffset, out Action<int> write))
{
write(data);
}
else
{
GetRef<int>(alignedOffset) = data;
}
}
}

private bool Check(int offset)
{
return (uint)Align(offset) < Unsafe.SizeOf<TState>();
}

public ref T GetRef<T>(int offset) where T : unmanaged
{
if ((uint)(offset + Unsafe.SizeOf<T>()) > Unsafe.SizeOf<TState>())
{
throw new ArgumentOutOfRangeException(nameof(offset));
}

return ref Unsafe.As<TState, T>(ref Unsafe.AddByteOffset(ref State, (IntPtr)offset));
}

private static int Align(int offset)
{
return offset & ~(RegisterSize - 1);
}
}
}
@@ -0,0 +1,8 @@
namespace Ryujinx.Graphics.Device
{
public interface IDeviceState
{
int Read(int offset);
void Write(int offset, int data);
}
}
@@ -0,0 +1,15 @@
using System;

namespace Ryujinx.Graphics.Device
{
[AttributeUsage(AttributeTargets.Field, AllowMultiple = false)]
public sealed class RegisterAttribute : Attribute
{
public AccessControl AccessControl { get; }

public RegisterAttribute(AccessControl ac)
{
AccessControl = ac;
}
}
}
@@ -0,0 +1,16 @@
using System;

namespace Ryujinx.Graphics.Device
{
public struct RwCallback
{
public Action<int> Write { get; }
public Func<int> Read { get; }

public RwCallback(Action<int> write, Func<int> read)
{
Write = write;
Read = read;
}
}
}
@@ -0,0 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>

</Project>
@@ -0,0 +1,63 @@
using System;
using System.Reflection;

namespace Ryujinx.Graphics.Device
{
static class SizeCalculator
{
public static int SizeOf(Type type)
{
// Is type a enum type?
if (type.IsEnum)
{
type = type.GetEnumUnderlyingType();
}

// Is type a pointer type?
if (type.IsPointer || type == typeof(IntPtr) || type == typeof(UIntPtr))
{
return IntPtr.Size;
}

// Is type a struct type?
if (type.IsValueType && !type.IsPrimitive)
{
// Check if the struct has a explicit size, if so, return that.
if (type.StructLayoutAttribute.Size != 0)
{
return type.StructLayoutAttribute.Size;
}

// Otherwise we calculate the sum of the sizes of all fields.
int size = 0;
var fields = type.GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);

for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++)
{
size += SizeOf(fields[fieldIndex].FieldType);
}

return size;
}

// Primitive types.
return (Type.GetTypeCode(type)) switch
{
TypeCode.SByte => sizeof(sbyte),
TypeCode.Byte => sizeof(byte),
TypeCode.Int16 => sizeof(short),
TypeCode.UInt16 => sizeof(ushort),
TypeCode.Int32 => sizeof(int),
TypeCode.UInt32 => sizeof(uint),
TypeCode.Int64 => sizeof(long),
TypeCode.UInt64 => sizeof(ulong),
TypeCode.Char => sizeof(char),
TypeCode.Single => sizeof(float),
TypeCode.Double => sizeof(double),
TypeCode.Decimal => sizeof(decimal),
TypeCode.Boolean => sizeof(bool),
_ => throw new ArgumentException($"Length for type \"{type.Name}\" is unknown.")
};
}
}
}
@@ -67,7 +67,7 @@ public void Dispatch(GpuState state, int argument)

TextureManager.SetComputeTextureBufferIndex(state.Get<int>(MethodOffset.TextureBufferIndex));

ShaderProgramInfo info = cs.Shaders[0].Program.Info;
ShaderProgramInfo info = cs.Shaders[0].Program.Info;

for (int index = 0; index < info.CBuffers.Count; index++)
{
@@ -63,7 +63,7 @@ private ConditionalRenderEnabled CounterNonZero(ulong gpuVa)
else
{
evt.Flush();
return (_context.MemoryAccessor.ReadUInt64(gpuVa) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
return (_context.MemoryAccessor.Read<ulong>(gpuVa) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
}
}

@@ -87,11 +87,11 @@ private ConditionalRenderEnabled CounterCompare(ulong gpuVa, bool isEqual)

if (evt != null && evt2 == null)
{
useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, _context.MemoryAccessor.ReadUInt64(gpuVa + 16), isEqual);
useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, _context.MemoryAccessor.Read<ulong>(gpuVa + 16), isEqual);
}
else if (evt == null && evt2 != null)
{
useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt2, _context.MemoryAccessor.ReadUInt64(gpuVa), isEqual);
useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt2, _context.MemoryAccessor.Read<ulong>(gpuVa), isEqual);
}
else
{
@@ -107,8 +107,8 @@ private ConditionalRenderEnabled CounterCompare(ulong gpuVa, bool isEqual)
evt?.Flush();
evt2?.Flush();

ulong x = _context.MemoryAccessor.ReadUInt64(gpuVa);
ulong y = _context.MemoryAccessor.ReadUInt64(gpuVa + 16);
ulong x = _context.MemoryAccessor.Read<ulong>(gpuVa);
ulong y = _context.MemoryAccessor.Read<ulong>(gpuVa + 16);

return (isEqual ? x == y : x != y) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
}
@@ -466,7 +466,7 @@ private void UpdateViewportTransform(GpuState state)

bool flipY = yControl.HasFlag(YControl.NegateY);
Origin origin = yControl.HasFlag(YControl.TriangleRastFlip) ? Origin.LowerLeft : Origin.UpperLeft;

_context.Renderer.Pipeline.SetOrigin(origin);

// The triangle rast flip flag only affects rasterization, the viewport is not flipped.
@@ -77,7 +77,7 @@ public GpuContext(IRenderer renderer)
{
Renderer = renderer;

MemoryManager = new MemoryManager();
MemoryManager = new MemoryManager(this);

MemoryAccessor = new MemoryAccessor(this);

@@ -643,6 +643,8 @@ public Texture FindOrCreateTexture(TextureInfo info, TextureSearchFlags flags =
overlap.ChangeSize(info.Width, info.Height, info.DepthOrLayers);
}

overlap.SynchronizeMemory();

return overlap;
}
}
@@ -58,42 +58,6 @@ public ReadOnlySpan<byte> GetSpan(ulong gpuVa, int size)
return MemoryMarshal.Cast<byte, T>(_context.PhysicalMemory.GetSpan(processVa, Unsafe.SizeOf<T>()))[0];
}

/// <summary>
/// Reads a 32-bits signed integer from GPU mapped memory.
/// </summary>
/// <param name="gpuVa">GPU virtual address where the value is located</param>
/// <returns>The value at the specified memory location</returns>
public int ReadInt32(ulong gpuVa)
{
ulong processVa = _context.MemoryManager.Translate(gpuVa);

return _context.PhysicalMemory.Read<int>(processVa);
}

/// <summary>
/// Reads a 64-bits unsigned integer from GPU mapped memory.
/// </summary>
/// <param name="gpuVa">GPU virtual address where the value is located</param>
/// <returns>The value at the specified memory location</returns>
public ulong ReadUInt64(ulong gpuVa)
{
ulong processVa = _context.MemoryManager.Translate(gpuVa);

return _context.PhysicalMemory.Read<ulong>(processVa);
}

/// <summary>
/// Reads a 8-bits unsigned integer from GPU mapped memory.
/// </summary>
/// <param name="gpuVa">GPU virtual address where the value is located</param>
/// <param name="value">The value to be written</param>
public void WriteByte(ulong gpuVa, byte value)
{
ulong processVa = _context.MemoryManager.Translate(gpuVa);

_context.PhysicalMemory.Write(processVa, MemoryMarshal.CreateSpan(ref value, 1));
}

/// <summary>
/// Writes a 32-bits signed integer to GPU mapped memory.
/// </summary>
@@ -1,4 +1,7 @@
using Ryujinx.Cpu;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace Ryujinx.Graphics.Gpu.Memory
{
@@ -33,14 +36,69 @@ public class MemoryManager

public event EventHandler<UnmapEventArgs> MemoryUnmapped;

private GpuContext _context;

/// <summary>
/// Creates a new instance of the GPU memory manager.
/// </summary>
public MemoryManager()
public MemoryManager(GpuContext context)
{
_context = context;
_pageTable = new ulong[PtLvl0Size][];
}

/// <summary>
/// Reads data from GPU mapped memory.
/// </summary>
/// <typeparam name="T">Type of the data</typeparam>
/// <param name="gpuVa">GPU virtual address where the data is located</param>
/// <returns>The data at the specified memory location</returns>
public T Read<T>(ulong gpuVa) where T : unmanaged
{
ulong processVa = Translate(gpuVa);

return MemoryMarshal.Cast<byte, T>(_context.PhysicalMemory.GetSpan(processVa, Unsafe.SizeOf<T>()))[0];
}

/// <summary>
/// Gets a read-only span of data from GPU mapped memory.
/// This reads as much data as possible, up to the specified maximum size.
/// </summary>
/// <param name="gpuVa">GPU virtual address where the data is located</param>
/// <param name="size">Size of the data</param>
/// <returns>The span of the data at the specified memory location</returns>
public ReadOnlySpan<byte> GetSpan(ulong gpuVa, int size)
{
ulong processVa = Translate(gpuVa);

return _context.PhysicalMemory.GetSpan(processVa, size);
}

/// <summary>
/// Gets a writable region from GPU mapped memory.
/// </summary>
/// <param name="address">Start address of the range</param>
/// <param name="size">Size in bytes to be range</param>
/// <returns>A writable region with the data at the specified memory location</returns>
public WritableRegion GetWritableRegion(ulong gpuVa, int size)
{
ulong processVa = Translate(gpuVa);

return _context.PhysicalMemory.GetWritableRegion(processVa, size);
}

/// <summary>
/// Writes data to GPU mapped memory.
/// </summary>
/// <param name="gpuVa">GPU virtual address to write the data into</param>
/// <param name="data">The data to be written</param>
public void Write(ulong gpuVa, ReadOnlySpan<byte> data)
{
ulong processVa = Translate(gpuVa);

_context.PhysicalMemory.Write(processVa, data);
}

/// <summary>
/// Maps a given range of pages to the specified CPU virtual address.
/// </summary>
@@ -1,3 +1,4 @@
using Ryujinx.Cpu;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
@@ -34,6 +35,17 @@ public ReadOnlySpan<byte> GetSpan(ulong address, int size)
return _cpuMemory.GetSpan(address, size);
}

/// <summary>
/// Gets a writable region from the application process.
/// </summary>
/// <param name="address">Start address of the range</param>
/// <param name="size">Size in bytes to be range</param>
/// <returns>A writable region with the data at the specified memory location</returns>
public WritableRegion GetWritableRegion(ulong address, int size)
{
return _cpuMemory.GetWritableRegion(address, size);
}

/// <summary>
/// Reads data from the application process.
/// </summary>
@@ -0,0 +1,20 @@
namespace Ryujinx.Graphics.Host1x
{
public enum ClassId
{
Host1x = 0x1,
Mpeg = 0x20,
Nvenc = 0x21,
Vi = 0x30,
Isp = 0x32,
Ispb = 0x34,
Vii2c = 0x36,
Vic = 0x5d,
Gr3d = 0x60,
Gpu = 0x61,
Tsec = 0xe0,
Tsecb = 0xe1,
Nvjpg = 0xc0,
Nvdec = 0xf0
}
}
@@ -0,0 +1,32 @@
using Ryujinx.Graphics.Device;
using System;
using System.Collections.Generic;

namespace Ryujinx.Graphics.Host1x
{
class Devices : IDisposable
{
private readonly Dictionary<ClassId, IDeviceState> _devices = new Dictionary<ClassId, IDeviceState>();

public void RegisterDevice(ClassId classId, IDeviceState device)
{
_devices[classId] = device;
}

public IDeviceState GetDevice(ClassId classId)
{
return _devices.TryGetValue(classId, out IDeviceState device) ? device : null;
}

public void Dispose()
{
foreach (var device in _devices.Values)
{
if (device is ThiDevice thi)
{
thi.Dispose();
}
}
}
}
}
@@ -0,0 +1,33 @@
using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.Gpu.Synchronization;
using System.Collections.Generic;
using System.Threading;

namespace Ryujinx.Graphics.Host1x
{
public class Host1xClass : IDeviceState
{
private readonly SynchronizationManager _syncMgr;
private readonly DeviceState<Host1xClassRegisters> _state;

public Host1xClass(SynchronizationManager syncMgr)
{
_syncMgr = syncMgr;
_state = new DeviceState<Host1xClassRegisters>(new Dictionary<string, RwCallback>
{
{ nameof(Host1xClassRegisters.WaitSyncpt32), new RwCallback(WaitSyncpt32, null) }
});
}

public int Read(int offset) => _state.Read(offset);
public void Write(int offset, int data) => _state.Write(offset, data);

private void WaitSyncpt32(int data)
{
uint syncpointId = (uint)(data & 0xFF);
uint threshold = _state.State.LoadSyncptPayload32;

_syncMgr.WaitOnSyncpoint(syncpointId, threshold, Timeout.InfiniteTimeSpan);
}
}
}
@@ -0,0 +1,41 @@
using Ryujinx.Common.Memory;

namespace Ryujinx.Graphics.Host1x
{
struct Host1xClassRegisters
{
public uint IncrSyncpt;
public uint IncrSyncptCntrl;
public uint IncrSyncptError;
public Array5<uint> ReservedC;
public uint WaitSyncpt;
public uint WaitSyncptBase;
public uint WaitSyncptIncr;
public uint LoadSyncptBase;
public uint IncrSyncptBase;
public uint Clear;
public uint Wait;
public uint WaitWithIntr;
public uint DelayUsec;
public uint TickcountHi;
public uint TickcountLo;
public uint Tickctrl;
public Array23<uint> Reserved50;
public uint Indctrl;
public uint Indoff2;
public uint Indoff;
public Array31<uint> Inddata;
public uint Reserved134;
public uint LoadSyncptPayload32;
public uint Stallctrl;
public uint WaitSyncpt32;
public uint WaitSyncptBase32;
public uint LoadSyncptBase32;
public uint IncrSyncptBase32;
public uint StallcountHi;
public uint StallcountLo;
public uint Xrefctrl;
public uint ChannelXrefHi;
public uint ChannelXrefLo;
}
}
@@ -0,0 +1,123 @@
using Ryujinx.Common;
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.Gpu.Synchronization;
using System;
using System.Numerics;

namespace Ryujinx.Graphics.Host1x
{
public sealed class Host1xDevice : IDisposable
{
private readonly SyncptIncrManager _syncptIncrMgr;
private readonly AsyncWorkQueue<int[]> _commandQueue;

private readonly Devices _devices = new Devices();

public Host1xClass Class { get; }

private IDeviceState _device;

private int _count;
private int _offset;
private int _mask;
private bool _incrementing;

public Host1xDevice(SynchronizationManager syncMgr)
{
_syncptIncrMgr = new SyncptIncrManager(syncMgr);
_commandQueue = new AsyncWorkQueue<int[]>(Process, "Ryujinx.Host1xProcessor");

Class = new Host1xClass(syncMgr);

_devices.RegisterDevice(ClassId.Host1x, Class);
}

public void RegisterDevice(ClassId classId, IDeviceState device)
{
var thi = new ThiDevice(classId, device ?? throw new ArgumentNullException(nameof(device)), _syncptIncrMgr);
_devices.RegisterDevice(classId, thi);
}

public void Submit(ReadOnlySpan<int> commandBuffer)
{
_commandQueue.Add(commandBuffer.ToArray());
}

private void Process(int[] commandBuffer)
{
for (int index = 0; index < commandBuffer.Length; index++)
{
Step(commandBuffer[index]);
}
}

private void Step(int value)
{
if (_mask != 0)
{
int lbs = BitOperations.TrailingZeroCount(_mask);

_mask &= ~(1 << lbs);

DeviceWrite(_offset + lbs, value);

return;
}
else if (_count != 0)
{
_count--;

DeviceWrite(_offset, value);

if (_incrementing)
{
_offset++;
}

return;
}

OpCode opCode = (OpCode)((value >> 28) & 0xf);

switch (opCode)
{
case OpCode.SetClass:
_mask = value & 0x3f;
ClassId classId = (ClassId)((value >> 6) & 0x3ff);
_offset = (value >> 16) & 0xfff;
_device = _devices.GetDevice(classId);
break;
case OpCode.Incr:
case OpCode.NonIncr:
_count = value & 0xffff;
_offset = (value >> 16) & 0xfff;
_incrementing = opCode == OpCode.Incr;
break;
case OpCode.Mask:
_mask = value & 0xffff;
_offset = (value >> 16) & 0xfff;
break;
case OpCode.Imm:
int data = value & 0xfff;
_offset = (value >> 16) & 0xfff;
DeviceWrite(_offset, data);
break;
default:
Logger.PrintError(LogClass.Host1x, $"Unsupported opcode \"{opCode}\".");
break;
}
}

private void DeviceWrite(int offset, int data)
{
_device?.Write(offset * 4, data);
}

public void Dispose()
{
_commandQueue.Dispose();
_devices.Dispose();
}
}
}
@@ -0,0 +1,21 @@
namespace Ryujinx.Graphics.Host1x
{
enum OpCode
{
SetClass,
Incr,
NonIncr,
Mask,
Imm,
Restart,
Gather,
SetStrmId,
SetAppId,
SetPyld,
IncrW,
NonIncrW,
GatherW,
RestartW,
Extend
}
}
@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<AllowUnsafeBlocks>false</AllowUnsafeBlocks>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<AllowUnsafeBlocks>false</AllowUnsafeBlocks>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\Ryujinx.Graphics.Device\Ryujinx.Graphics.Device.csproj" />
<ProjectReference Include="..\Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj" />
</ItemGroup>

</Project>
@@ -0,0 +1,99 @@
using Ryujinx.Graphics.Gpu.Synchronization;
using System.Collections.Generic;

namespace Ryujinx.Graphics.Host1x
{
class SyncptIncrManager
{
private readonly SynchronizationManager _syncMgr;

private struct SyncptIncr
{
public uint Id { get; }
public ClassId ClassId { get; }
public uint SyncptId { get; }
public bool Done { get; }

public SyncptIncr(uint id, ClassId classId, uint syncptId, bool done = false)
{
Id = id;
ClassId = classId;
SyncptId = syncptId;
Done = done;
}
}

private readonly List<SyncptIncr> _incrs = new List<SyncptIncr>();

private uint _currentId;

public SyncptIncrManager(SynchronizationManager syncMgr)
{
_syncMgr = syncMgr;
}

public void Increment(uint id)
{
lock (_incrs)
{
_incrs.Add(new SyncptIncr(0, 0, id, true));

IncrementAllDone();
}
}

public uint IncrementWhenDone(ClassId classId, uint id)
{
lock (_incrs)
{
uint handle = _currentId++;

_incrs.Add(new SyncptIncr(handle, classId, id));

return handle;
}
}

public void SignalDone(uint handle)
{
lock (_incrs)
{
// Set pending increment with the given handle to "done".
for (int i = 0; i < _incrs.Count; i++)
{
SyncptIncr incr = _incrs[i];

if (_incrs[i].Id == handle)
{
_incrs[i] = new SyncptIncr(incr.Id, incr.ClassId, incr.SyncptId, true);

break;
}
}

IncrementAllDone();
}
}

private void IncrementAllDone()
{
lock (_incrs)
{
// Increment all sequential pending increments that are already done.
int doneCount = 0;

for (; doneCount < _incrs.Count; doneCount++)
{
if (!_incrs[doneCount].Done)
{
break;
}

_syncMgr.IncrementSyncpoint(_incrs[doneCount].SyncptId);
}

_incrs.RemoveRange(0, doneCount);
}
}
}
}
@@ -0,0 +1,96 @@
using Ryujinx.Common;
using Ryujinx.Graphics.Device;
using System;
using System.Collections.Generic;

namespace Ryujinx.Graphics.Host1x
{
class ThiDevice : IDeviceState, IDisposable
{
private readonly ClassId _classId;
private readonly IDeviceState _device;

private readonly SyncptIncrManager _syncptIncrMgr;

private class CommandAction
{
public int Data { get; }

public CommandAction(int data)
{
Data = data;
}
}

private class MethodCallAction : CommandAction
{
public int Method { get; }

public MethodCallAction(int method, int data) : base(data)
{
Method = method;
}
}

private class SyncptIncrAction : CommandAction
{
public SyncptIncrAction(uint syncptIncrHandle) : base((int)syncptIncrHandle)
{
}
}

private readonly AsyncWorkQueue<CommandAction> _commandQueue;

private readonly DeviceState<ThiRegisters> _state;

public ThiDevice(ClassId classId, IDeviceState device, SyncptIncrManager syncptIncrMgr)
{
_classId = classId;
_device = device;
_syncptIncrMgr = syncptIncrMgr;
_commandQueue = new AsyncWorkQueue<CommandAction>(Process, $"Ryujinx.{classId}Processor");
_state = new DeviceState<ThiRegisters>(new Dictionary<string, RwCallback>
{
{ nameof(ThiRegisters.IncrSyncpt), new RwCallback(IncrSyncpt, null) },
{ nameof(ThiRegisters.Method1), new RwCallback(Method1, null) }
});
}

public int Read(int offset) => _state.Read(offset);
public void Write(int offset, int data) => _state.Write(offset, data);

private void IncrSyncpt(int data)
{
uint syncpointId = (uint)(data & 0xFF);
uint cond = (uint)((data >> 8) & 0xFF); // 0 = Immediate, 1 = Done

if (cond == 0)
{
_syncptIncrMgr.Increment(syncpointId);
}
else
{
_commandQueue.Add(new SyncptIncrAction(_syncptIncrMgr.IncrementWhenDone(_classId, syncpointId)));
}
}

private void Method1(int data)
{
_commandQueue.Add(new MethodCallAction((int)_state.State.Method0 * 4, data));
}

private void Process(CommandAction cmdAction)
{
if (cmdAction is SyncptIncrAction syncptIncrAction)
{
_syncptIncrMgr.SignalDone((uint)syncptIncrAction.Data);
}
else if (cmdAction is MethodCallAction methodCallAction)
{
_device.Write(methodCallAction.Method, methodCallAction.Data);
}
}

public void Dispose() => _commandQueue.Dispose();
}
}
@@ -0,0 +1,22 @@
using Ryujinx.Common.Memory;

namespace Ryujinx.Graphics.Host1x
{
struct ThiRegisters
{
public uint IncrSyncpt;
public uint Reserved4;
public uint IncrSyncptErr;
public uint CtxswIncrSyncpt;
public Array4<uint> Reserved10;
public uint Ctxsw;
public uint Reserved24;
public uint ContSyncptEof;
public Array5<uint> Reserved2C;
public uint Method0;
public uint Method1;
public Array12<uint> Reserved48;
public uint IntStatus;
public uint IntMask;
}
}
@@ -0,0 +1,40 @@
using Ryujinx.Graphics.Video;
using System;

namespace Ryujinx.Graphics.Nvdec.H264
{
public class Decoder : IH264Decoder
{
public bool IsHardwareAccelerated => false;

private const int WorkBufferSize = 0x200;

private readonly byte[] _workBuffer = new byte[WorkBufferSize];

private readonly FFmpegContext _context = new FFmpegContext();

public ISurface CreateSurface(int width, int height)
{
return new Surface();
}

public bool Decode(ref H264PictureInfo pictureInfo, ISurface output, ReadOnlySpan<byte> bitstream)
{
Span<byte> bs = Prepend(bitstream, SpsAndPpsReconstruction.Reconstruct(ref pictureInfo, _workBuffer));

return _context.DecodeFrame((Surface)output, bs) == 0;
}

private static byte[] Prepend(ReadOnlySpan<byte> data, ReadOnlySpan<byte> prep)
{
byte[] output = new byte[data.Length + prep.Length];

prep.CopyTo(output);
data.CopyTo(new Span<byte>(output).Slice(prep.Length));

return output;
}

public void Dispose() => _context.Dispose();
}
}
@@ -0,0 +1,51 @@
using FFmpeg.AutoGen;
using System;

namespace Ryujinx.Graphics.Nvdec.H264
{
unsafe class FFmpegContext : IDisposable
{
private readonly AVCodec* _codec;
private AVCodecContext* _context;

public FFmpegContext()
{
_codec = ffmpeg.avcodec_find_decoder(AVCodecID.AV_CODEC_ID_H264);
_context = ffmpeg.avcodec_alloc_context3(_codec);

ffmpeg.avcodec_open2(_context, _codec, null);
}

public int DecodeFrame(Surface output, ReadOnlySpan<byte> bitstream)
{
AVPacket packet;

ffmpeg.av_init_packet(&packet);

fixed (byte* ptr = bitstream)
{
packet.data = ptr;
packet.size = bitstream.Length;

int rc = ffmpeg.avcodec_send_packet(_context, &packet);

if (rc != 0)
{
return rc;
}
}

return ffmpeg.avcodec_receive_frame(_context, output.Frame);
}

public void Dispose()
{
ffmpeg.avcodec_close(_context);

fixed (AVCodecContext** ppContext = &_context)
{
ffmpeg.avcodec_free_context(ppContext);
}
}
}
}
@@ -0,0 +1,121 @@
using System;
using System.Numerics;

namespace Ryujinx.Graphics.Nvdec.H264
{
struct H264BitStreamWriter
{
private const int BufferSize = 8;

private readonly byte[] _workBuffer;

private int _offset;
private int _buffer;
private int _bufferPos;

public H264BitStreamWriter(byte[] workBuffer)
{
_workBuffer = workBuffer;
_offset = 0;
_buffer = 0;
_bufferPos = 0;
}

public void WriteBit(bool value)
{
WriteBits(value ? 1 : 0, 1);
}

public void WriteBits(int value, int valueSize)
{
int valuePos = 0;

int remaining = valueSize;

while (remaining > 0)
{
int copySize = remaining;

int free = GetFreeBufferBits();

if (copySize > free)
{
copySize = free;
}

int mask = (1 << copySize) - 1;

int srcShift = (valueSize - valuePos) - copySize;
int dstShift = (BufferSize - _bufferPos) - copySize;

_buffer |= ((value >> srcShift) & mask) << dstShift;

valuePos += copySize;
_bufferPos += copySize;
remaining -= copySize;
}
}

private int GetFreeBufferBits()
{
if (_bufferPos == BufferSize)
{
Flush();
}

return BufferSize - _bufferPos;
}

public void Flush()
{
if (_bufferPos != 0)
{
_workBuffer[_offset++] = (byte)_buffer;

_buffer = 0;
_bufferPos = 0;
}
}

public void End()
{
WriteBit(true);

Flush();
}

public Span<byte> AsSpan()
{
return new Span<byte>(_workBuffer).Slice(0, _offset);
}

public void WriteU(uint value, int valueSize) => WriteBits((int)value, valueSize);
public void WriteSe(int value) => WriteExpGolombCodedInt(value);
public void WriteUe(uint value) => WriteExpGolombCodedUInt(value);

private void WriteExpGolombCodedInt(int value)
{
int sign = value <= 0 ? 0 : 1;

if (value < 0)
{
value = -value;
}

value = (value << 1) - sign;

WriteExpGolombCodedUInt((uint)value);
}

private void WriteExpGolombCodedUInt(uint value)
{
int size = 32 - BitOperations.LeadingZeroCount(value + 1);

WriteBits(1, size);

value -= (1u << (size - 1)) - 1;

WriteBits((int)value, size - 1);
}
}
}
@@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="FFmpeg.AutoGen" Version="4.3.0" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" />
</ItemGroup>

</Project>
@@ -0,0 +1,159 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
using System;

namespace Ryujinx.Graphics.Nvdec.H264
{
static class SpsAndPpsReconstruction
{
public static Span<byte> Reconstruct(ref H264PictureInfo pictureInfo, byte[] workBuffer)
{
H264BitStreamWriter writer = new H264BitStreamWriter(workBuffer);

// Sequence Parameter Set.
writer.WriteU(1, 24);
writer.WriteU(0, 1);
writer.WriteU(3, 2);
writer.WriteU(7, 5);
writer.WriteU(100, 8); // Profile idc
writer.WriteU(0, 8); // Reserved
writer.WriteU(31, 8); // Level idc
writer.WriteUe(0); // Seq parameter set id
writer.WriteUe(pictureInfo.ChromaFormatIdc);

if (pictureInfo.ChromaFormatIdc == 3)
{
writer.WriteBit(false); // Separate colour plane flag
}

writer.WriteUe(0); // Bit depth luma minus 8
writer.WriteUe(0); // Bit depth chroma minus 8
writer.WriteBit(pictureInfo.QpprimeYZeroTransformBypassFlag);
writer.WriteBit(false); // Scaling matrix present flag

writer.WriteUe(pictureInfo.Log2MaxFrameNumMinus4);
writer.WriteUe(pictureInfo.PicOrderCntType);

if (pictureInfo.PicOrderCntType == 0)
{
writer.WriteUe(pictureInfo.Log2MaxPicOrderCntLsbMinus4);
}
else if (pictureInfo.PicOrderCntType == 1)
{
writer.WriteBit(pictureInfo.DeltaPicOrderAlwaysZeroFlag);

writer.WriteSe(0); // Offset for non-ref pic
writer.WriteSe(0); // Offset for top to bottom field
writer.WriteUe(0); // Num ref frames in pic order cnt cycle
}

writer.WriteUe(16); // Max num ref frames
writer.WriteBit(false); // Gaps in frame num value allowed flag
writer.WriteUe(pictureInfo.PicWidthInMbsMinus1);
writer.WriteUe(pictureInfo.PicHeightInMapUnitsMinus1);
writer.WriteBit(pictureInfo.FrameMbsOnlyFlag);

if (!pictureInfo.FrameMbsOnlyFlag)
{
writer.WriteBit(pictureInfo.MbAdaptiveFrameFieldFlag);
}

writer.WriteBit(pictureInfo.Direct8x8InferenceFlag);
writer.WriteBit(false); // Frame cropping flag
writer.WriteBit(false); // VUI parameter present flag

writer.End();

// Picture Parameter Set.
writer.WriteU(1, 24);
writer.WriteU(0, 1);
writer.WriteU(3, 2);
writer.WriteU(8, 5);

writer.WriteUe(0); // Pic parameter set id
writer.WriteUe(0); // Seq parameter set id

writer.WriteBit(pictureInfo.EntropyCodingModeFlag);
writer.WriteBit(false); // Bottom field pic order in frame present flag
writer.WriteUe(0); // Num slice groups minus 1
writer.WriteUe(pictureInfo.NumRefIdxL0ActiveMinus1);
writer.WriteUe(pictureInfo.NumRefIdxL1ActiveMinus1);
writer.WriteBit(pictureInfo.WeightedPredFlag);
writer.WriteU(pictureInfo.WeightedBipredIdc, 2);
writer.WriteSe(pictureInfo.PicInitQpMinus26);
writer.WriteSe(0); // Pic init qs minus 26
writer.WriteSe(pictureInfo.ChromaQpIndexOffset);
writer.WriteBit(pictureInfo.DeblockingFilterControlPresentFlag);
writer.WriteBit(pictureInfo.ConstrainedIntraPredFlag);
writer.WriteBit(pictureInfo.RedundantPicCntPresentFlag);
writer.WriteBit(pictureInfo.Transform8x8ModeFlag);

writer.WriteBit(pictureInfo.ScalingMatrixPresent);

if (pictureInfo.ScalingMatrixPresent)
{
for (int index = 0; index < 6; index++)
{
writer.WriteBit(true);

WriteScalingList(ref writer, pictureInfo.ScalingLists4x4[index]);
}

if (pictureInfo.Transform8x8ModeFlag)
{
for (int index = 0; index < 2; index++)
{
writer.WriteBit(true);

WriteScalingList(ref writer, pictureInfo.ScalingLists8x8[index]);
}
}
}

writer.WriteSe(pictureInfo.SecondChromaQpIndexOffset);

writer.End();

return writer.AsSpan();
}

// ZigZag LUTs from libavcodec.
private static readonly byte[] ZigZagDirect = new byte[]
{
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63
};

private static readonly byte[] ZigZagScan = new byte[]
{
0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4
};

private static void WriteScalingList(ref H264BitStreamWriter writer, IArray<byte> list)
{
byte[] scan = list.Length == 16 ? ZigZagScan : ZigZagDirect;

int lastScale = 8;

for (int index = 0; index < list.Length; index++)
{
byte value = list[scan[index]];

int deltaScale = value - lastScale;

writer.WriteSe(deltaScale);

lastScale = value;
}
}
}
}
@@ -0,0 +1,33 @@
using FFmpeg.AutoGen;
using Ryujinx.Graphics.Video;
using System;

namespace Ryujinx.Graphics.Nvdec.H264
{
unsafe class Surface : ISurface
{
public AVFrame* Frame { get; }

public Plane YPlane => new Plane((IntPtr)Frame->data[0], Stride * Height);
public Plane UPlane => new Plane((IntPtr)Frame->data[1], UvStride * UvHeight);
public Plane VPlane => new Plane((IntPtr)Frame->data[2], UvStride * UvHeight);

public int Width => Frame->width;
public int Height => Frame->height;
public int Stride => Frame->linesize[0];
public int UvWidth => (Frame->width + 1) >> 1;
public int UvHeight => (Frame->height + 1) >> 1;
public int UvStride => Frame->linesize[1];

public Surface()
{
Frame = ffmpeg.av_frame_alloc();
}

public void Dispose()
{
ffmpeg.av_frame_unref(Frame);
ffmpeg.av_free(Frame);
}
}
}
@@ -0,0 +1,9 @@
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal enum BitDepth
{
Bits8 = 8, /**< 8 bits */
Bits10 = 10, /**< 10 bits */
Bits12 = 12, /**< 12 bits */
}
}
@@ -0,0 +1,56 @@
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal enum CodecErr
{
/*!\brief Operation completed without error */
CodecOk,

/*!\brief Unspecified error */
CodecError,

/*!\brief Memory operation failed */
CodecMemError,

/*!\brief ABI version mismatch */
CodecAbiMismatch,

/*!\brief Algorithm does not have required capability */
CodecIncapable,

/*!\brief The given bitstream is not supported.
*
* The bitstream was unable to be parsed at the highest level. The decoder
* is unable to proceed. This error \ref SHOULD be treated as fatal to the
* stream. */
CodecUnsupBitstream,

/*!\brief Encoded bitstream uses an unsupported feature
*
* The decoder does not implement a feature required by the encoder. This
* return code should only be used for features that prevent future
* pictures from being properly decoded. This error \ref MAY be treated as
* fatal to the stream or \ref MAY be treated as fatal to the current GOP.
*/
CodecUnsupFeature,

/*!\brief The coded data for this stream is corrupt or incomplete
*
* There was a problem decoding the current frame. This return code
* should only be used for failures that prevent future pictures from
* being properly decoded. This error \ref MAY be treated as fatal to the
* stream or \ref MAY be treated as fatal to the current GOP. If decoding
* is continued for the current GOP, artifacts may be present.
*/
CodecCorruptFrame,

/*!\brief An application-supplied parameter is not valid.
*
*/
CodecInvalidParam,

/*!\brief An iterator reached the end of list.
*
*/
CodecListEnd
}
}
@@ -0,0 +1,59 @@
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;

namespace Ryujinx.Graphics.Nvdec.Vp9.Common
{
internal static class BitUtils
{
// FIXME: Enable inlining here after AVX2 gather bug is fixed.
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte ClipPixel(int val)
{
return (byte)((val > 255) ? 255 : (val < 0) ? 0 : val);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort ClipPixelHighbd(int val, int bd)
{
return bd switch
{
10 => (ushort)Math.Clamp(val, 0, 1023),
12 => (ushort)Math.Clamp(val, 0, 4095),
_ => (ushort)Math.Clamp(val, 0, 255)
};
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int RoundPowerOfTwo(int value, int n)
{
return (value + (1 << (n - 1))) >> n;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long RoundPowerOfTwo(long value, int n)
{
return (value + (1L << (n - 1))) >> n;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int AlignPowerOfTwo(int value, int n)
{
return (value + ((1 << n) - 1)) & ~((1 << n) - 1);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int GetMsb(uint n)
{
Debug.Assert(n != 0);
return 31 ^ BitOperations.LeadingZeroCount(n);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int GetUnsignedBits(uint numValues)
{
return numValues > 0 ? GetMsb(numValues) + 1 : 0;
}
}
}
@@ -0,0 +1,94 @@
using Ryujinx.Common.Memory;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace Ryujinx.Graphics.Nvdec.Vp9.Common
{
internal class MemoryAllocator : IDisposable
{
private const int PoolEntries = 10;

private struct PoolItem
{
public IntPtr Pointer;
public int Length;
public bool InUse;
}

private PoolItem[] _pool = new PoolItem[PoolEntries];

public ArrayPtr<T> Allocate<T>(int length) where T : unmanaged
{
int lengthInBytes = Unsafe.SizeOf<T>() * length;

IntPtr ptr = IntPtr.Zero;

for (int i = 0; i < PoolEntries; i++)
{
ref PoolItem item = ref _pool[i];

if (!item.InUse && item.Length == lengthInBytes)
{
item.InUse = true;
ptr = item.Pointer;
break;
}
}

if (ptr == IntPtr.Zero)
{
ptr = Marshal.AllocHGlobal(lengthInBytes);

for (int i = 0; i < PoolEntries; i++)
{
ref PoolItem item = ref _pool[i];

if (!item.InUse)
{
item.InUse = true;
if (item.Pointer != IntPtr.Zero)
{
Marshal.FreeHGlobal(item.Pointer);
}
item.Pointer = ptr;
item.Length = lengthInBytes;
break;
}
}
}

return new ArrayPtr<T>(ptr, length);
}

public unsafe void Free<T>(ArrayPtr<T> arr) where T : unmanaged
{
IntPtr ptr = (IntPtr)arr.ToPointer();

for (int i = 0; i < PoolEntries; i++)
{
ref PoolItem item = ref _pool[i];

if (item.Pointer == ptr)
{
item.InUse = false;
break;
}
}
}

public void Dispose()
{
for (int i = 0; i < PoolEntries; i++)
{
ref PoolItem item = ref _pool[i];

if (item.Pointer != IntPtr.Zero)
{
Marshal.FreeHGlobal(item.Pointer);
item.Pointer = IntPtr.Zero;
}
}
}
}
}
@@ -0,0 +1,25 @@
using Ryujinx.Common.Memory;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace Ryujinx.Graphics.Nvdec.Vp9.Common
{
internal static class MemoryUtil
{
public static unsafe void Copy<T>(T* dest, T* source, int length) where T : unmanaged
{
new Span<T>(source, length).CopyTo(new Span<T>(dest, length));
}

public static void Copy<T>(ref T dest, ref T source) where T : unmanaged
{
MemoryMarshal.CreateSpan(ref source, 1).CopyTo(MemoryMarshal.CreateSpan(ref dest, 1));
}

public static unsafe void Fill<T>(T* ptr, T value, int length) where T : unmanaged
{
new Span<T>(ptr, length).Fill(value);
}
}
}
@@ -0,0 +1,71 @@
using Ryujinx.Graphics.Nvdec.Vp9.Types;

namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class Constants
{
public const int Vp9InterpExtend = 4;

public const int MaxMbPlane = 3;

public const int None = -1;
public const int IntraFrame = 0;
public const int LastFrame = 1;
public const int GoldenFrame = 2;
public const int AltRefFrame = 3;
public const int MaxRefFrames = 4;

public const int MiSizeLog2 = 3;
public const int MiBlockSizeLog2 = 6 - MiSizeLog2; // 64 = 2^6

public const int MiSize = 1 << MiSizeLog2; // pixels per mi-unit
public const int MiBlockSize = 1 << MiBlockSizeLog2; // mi-units per max block
public const int MiMask = MiBlockSize - 1;

public const int PartitionPloffset = 4; // number of probability models per block size

/* Segment Feature Masks */
public const int MaxMvRefCandidates = 2;

public const int CompInterContexts = 5;
public const int RefContexts = 5;

public const int EightTap = 0;
public const int EightTapSmooth = 1;
public const int EightTapSharp = 2;
public const int SwitchableFilters = 3; /* Number of switchable filters */
public const int Bilinear = 3;
public const int Switchable = 4; /* should be the last one */

// Frame
public const int RefsPerFrame = 3;

public const int NumPingPongBuffers = 2;

public const int Class0Bits = 1; /* bits at integer precision for class 0 */
public const int Class0Size = 1 << Class0Bits;

public const int MvInUseBits = 14;
public const int MvUpp = (1 << MvInUseBits) - 1;
public const int MvLow = -(1 << MvInUseBits);

// Coefficient token alphabet
public const int ZeroToken = 0; // 0 Extra Bits 0+0
public const int OneToken = 1; // 1 Extra Bits 0+1
public const int TwoToken = 2; // 2 Extra Bits 0+1

public const int PivotNode = 2;

public const int Cat1MinVal = 5;
public const int Cat2MinVal = 7;
public const int Cat3MinVal = 11;
public const int Cat4MinVal = 19;
public const int Cat5MinVal = 35;
public const int Cat6MinVal = 67;

public const int EobModelToken = 3;

public const int SegmentAbsData = 1;
public const int MaxSegments = 8;
}
}

Large diffs are not rendered by default.

Large diffs are not rendered by default.

@@ -0,0 +1,164 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using Ryujinx.Graphics.Video;
using System;
using Vp9MvRef = Ryujinx.Graphics.Video.Vp9MvRef;

namespace Ryujinx.Graphics.Nvdec.Vp9
{
public class Decoder : IVp9Decoder
{
public bool IsHardwareAccelerated => false;

private readonly MemoryAllocator _allocator = new MemoryAllocator();

public ISurface CreateSurface(int width, int height) => new Surface(width, height);

private static readonly byte[] LiteralToFilter = new byte[]
{
Constants.EightTapSmooth,
Constants.EightTap,
Constants.EightTapSharp,
Constants.Bilinear
};

public unsafe bool Decode(
ref Vp9PictureInfo pictureInfo,
ISurface output,
ReadOnlySpan<byte> bitstream,
ReadOnlySpan<Vp9MvRef> mvsIn,
Span<Vp9MvRef> mvsOut)
{
Vp9Common cm = new Vp9Common();

cm.FrameType = pictureInfo.IsKeyFrame ? FrameType.KeyFrame : FrameType.InterFrame;
cm.IntraOnly = pictureInfo.IntraOnly;

cm.Width = output.Width;
cm.Height = output.Height;

cm.UsePrevFrameMvs = pictureInfo.UsePrevInFindMvRefs;

cm.RefFrameSignBias = pictureInfo.RefFrameSignBias;

cm.BaseQindex = pictureInfo.BaseQIndex;
cm.YDcDeltaQ = pictureInfo.YDcDeltaQ;
cm.UvAcDeltaQ = pictureInfo.UvAcDeltaQ;
cm.UvDcDeltaQ = pictureInfo.UvDcDeltaQ;

cm.Mb.Lossless = pictureInfo.Lossless;

cm.TxMode = (TxMode)pictureInfo.TransformMode;

cm.AllowHighPrecisionMv = pictureInfo.AllowHighPrecisionMv;

cm.InterpFilter = (byte)pictureInfo.InterpFilter;

if (cm.InterpFilter != Constants.Switchable)
{
cm.InterpFilter = LiteralToFilter[cm.InterpFilter];
}

cm.ReferenceMode = (ReferenceMode)pictureInfo.ReferenceMode;

cm.CompFixedRef = pictureInfo.CompFixedRef;
cm.CompVarRef = pictureInfo.CompVarRef;

cm.Log2TileCols = pictureInfo.Log2TileCols;
cm.Log2TileRows = pictureInfo.Log2TileRows;

cm.Seg.Enabled = pictureInfo.SegmentEnabled;
cm.Seg.UpdateMap = pictureInfo.SegmentMapUpdate;
cm.Seg.TemporalUpdate = pictureInfo.SegmentMapTemporalUpdate;
cm.Seg.AbsDelta = (byte)pictureInfo.SegmentAbsDelta;
cm.Seg.FeatureMask = pictureInfo.SegmentFeatureEnable;
cm.Seg.FeatureData = pictureInfo.SegmentFeatureData;

cm.Lf.ModeRefDeltaEnabled = pictureInfo.ModeRefDeltaEnabled;
cm.Lf.RefDeltas = pictureInfo.RefDeltas;
cm.Lf.ModeDeltas = pictureInfo.ModeDeltas;

cm.Fc = new Ptr<Vp9EntropyProbs>(ref pictureInfo.Entropy);
cm.Counts = new Ptr<Vp9BackwardUpdates>(ref pictureInfo.BackwardUpdateCounts);

cm.FrameRefs[0].Buf = (Surface)pictureInfo.LastReference;
cm.FrameRefs[1].Buf = (Surface)pictureInfo.GoldenReference;
cm.FrameRefs[2].Buf = (Surface)pictureInfo.AltReference;
cm.Mb.CurBuf = (Surface)output;

cm.Mb.SetupBlockPlanes(1, 1);

cm.AllocTileWorkerData(_allocator, 1 << pictureInfo.Log2TileCols, 1 << pictureInfo.Log2TileRows);
cm.AllocContextBuffers(_allocator, output.Width, output.Height);
cm.InitContextBuffers();
cm.SetupSegmentationDequant();
cm.SetupScaleFactors();

SetMvs(ref cm, mvsIn);

fixed (byte* dataPtr = bitstream)
{
try
{
DecodeFrame.DecodeTiles(ref cm, new ArrayPtr<byte>(dataPtr, bitstream.Length));
}
catch (InternalErrorException)
{
return false;
}
}

GetMvs(ref cm, mvsOut);

cm.FreeTileWorkerData(_allocator);
cm.FreeContextBuffers(_allocator);

return true;
}

private static void SetMvs(ref Vp9Common cm, ReadOnlySpan<Vp9MvRef> mvs)
{
if (mvs.Length > cm.PrevFrameMvs.Length)
{
throw new ArgumentException($"Size mismatch, expected: {cm.PrevFrameMvs.Length}, but got: {mvs.Length}.");
}

for (int i = 0; i < mvs.Length; i++)
{
ref var mv = ref cm.PrevFrameMvs[i];

mv.Mv[0].Row = mvs[i].Mvs[0].Row;
mv.Mv[0].Col = mvs[i].Mvs[0].Col;
mv.Mv[1].Row = mvs[i].Mvs[1].Row;
mv.Mv[1].Col = mvs[i].Mvs[1].Col;

mv.RefFrame[0] = (sbyte)mvs[i].RefFrames[0];
mv.RefFrame[1] = (sbyte)mvs[i].RefFrames[1];
}
}

private static void GetMvs(ref Vp9Common cm, Span<Vp9MvRef> mvs)
{
if (mvs.Length > cm.CurFrameMvs.Length)
{
throw new ArgumentException($"Size mismatch, expected: {cm.CurFrameMvs.Length}, but got: {mvs.Length}.");
}

for (int i = 0; i < mvs.Length; i++)
{
ref var mv = ref cm.CurFrameMvs[i];

mvs[i].Mvs[0].Row = mv.Mv[0].Row;
mvs[i].Mvs[0].Col = mv.Mv[0].Col;
mvs[i].Mvs[1].Row = mv.Mv[1].Row;
mvs[i].Mvs[1].Col = mv.Mv[1].Col;

mvs[i].RefFrames[0] = mv.RefFrame[0];
mvs[i].RefFrames[1] = mv.RefFrame[1];
}
}

public void Dispose() => _allocator.Dispose();
}
}
@@ -0,0 +1,325 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using Ryujinx.Graphics.Video;
using System;
using System.Diagnostics;
using System.Runtime.InteropServices;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;

namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class Detokenize
{
private const int EobContextNode = 0;
private const int ZeroContextNode = 1;
private const int OneContextNode = 2;

private static int GetCoefContext(ReadOnlySpan<short> neighbors, ReadOnlySpan<byte> tokenCache, int c)
{
const int maxNeighbors = 2;

return (1 + tokenCache[neighbors[maxNeighbors * c + 0]] + tokenCache[neighbors[maxNeighbors * c + 1]]) >> 1;
}

private static int ReadCoeff(
ref Reader r,
ReadOnlySpan<byte> probs,
int n,
ref ulong value,
ref int count,
ref uint range)
{
int i, val = 0;
for (i = 0; i < n; ++i)
{
val = (val << 1) | r.ReadBool(probs[i], ref value, ref count, ref range);
}

return val;
}

private static int DecodeCoefs(
ref MacroBlockD xd,
PlaneType type,
Span<int> dqcoeff,
TxSize txSize,
ref Array2<short> dq,
int ctx,
ReadOnlySpan<short> scan,
ReadOnlySpan<short> nb,
ref Reader r)
{
ref Vp9BackwardUpdates counts = ref xd.Counts.Value;
int maxEob = 16 << ((int)txSize << 1);
ref Vp9EntropyProbs fc = ref xd.Fc.Value;
int refr = xd.Mi[0].Value.IsInterBlock() ? 1 : 0;
int band, c = 0;
ref Array6<Array6<Array3<byte>>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr];
Span<byte> tokenCache = stackalloc byte[32 * 32];
ReadOnlySpan<byte> bandTranslate = Luts.get_band_translate(txSize);
int dqShift = (txSize == TxSize.Tx32x32) ? 1 : 0;
int v;
short dqv = dq[0];
ReadOnlySpan<byte> cat6Prob = (xd.Bd == 12)
? Luts.Vp9Cat6ProbHigh12
: (xd.Bd == 10) ? new ReadOnlySpan<byte>(Luts.Vp9Cat6ProbHigh12).Slice(2) : Luts.Vp9Cat6Prob;
int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14;
// Keep value, range, and count as locals. The compiler produces better
// results with the locals than using r directly.
ulong value = r.Value;
uint range = r.Range;
int count = r.Count;

while (c < maxEob)
{
int val = -1;
band = bandTranslate[0];
bandTranslate = bandTranslate.Slice(1);
ref Array3<byte> prob = ref coefProbs[band][ctx];
if (!xd.Counts.IsNull)
{
++counts.EobBranch[(int)txSize][(int)type][refr][band][ctx];
}

if (r.ReadBool(prob[EobContextNode], ref value, ref count, ref range) == 0)
{
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.EobModelToken];
}

break;
}

while (r.ReadBool(prob[ZeroContextNode], ref value, ref count, ref range) == 0)
{
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.ZeroToken];
}

dqv = dq[1];
tokenCache[scan[c]] = 0;
++c;
if (c >= maxEob)
{
r.Value = value;
r.Range = range;
r.Count = count;
return c; // Zero tokens at the end (no eob token)
}
ctx = GetCoefContext(nb, tokenCache, c);
band = bandTranslate[0];
bandTranslate = bandTranslate.Slice(1);
prob = ref coefProbs[band][ctx];
}

if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0)
{
ReadOnlySpan<byte> p = Luts.Vp9Pareto8Full[prob[Constants.PivotNode] - 1];
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken];
}

if (r.ReadBool(p[0], ref value, ref count, ref range) != 0)
{
if (r.ReadBool(p[3], ref value, ref count, ref range) != 0)
{
tokenCache[scan[c]] = 5;
if (r.ReadBool(p[5], ref value, ref count, ref range) != 0)
{
if (r.ReadBool(p[7], ref value, ref count, ref range) != 0)
{
val = Constants.Cat6MinVal + ReadCoeff(ref r, cat6Prob, cat6Bits, ref value, ref count, ref range);
}
else
{
val = Constants.Cat5MinVal + ReadCoeff(ref r, Luts.Vp9Cat5Prob, 5, ref value, ref count, ref range);
}
}
else if (r.ReadBool(p[6], ref value, ref count, ref range) != 0)
{
val = Constants.Cat4MinVal + ReadCoeff(ref r, Luts.Vp9Cat4Prob, 4, ref value, ref count, ref range);
}
else
{
val = Constants.Cat3MinVal + ReadCoeff(ref r, Luts.Vp9Cat3Prob, 3, ref value, ref count, ref range);
}
}
else
{
tokenCache[scan[c]] = 4;
if (r.ReadBool(p[4], ref value, ref count, ref range) != 0)
{
val = Constants.Cat2MinVal + ReadCoeff(ref r, Luts.Vp9Cat2Prob, 2, ref value, ref count, ref range);
}
else
{
val = Constants.Cat1MinVal + ReadCoeff(ref r, Luts.Vp9Cat1Prob, 1, ref value, ref count, ref range);
}
}
// Val may use 18-bits
v = (int)(((long)val * dqv) >> dqShift);
}
else
{
if (r.ReadBool(p[1], ref value, ref count, ref range) != 0)
{
tokenCache[scan[c]] = 3;
v = ((3 + r.ReadBool(p[2], ref value, ref count, ref range)) * dqv) >> dqShift;
}
else
{
tokenCache[scan[c]] = 2;
v = (2 * dqv) >> dqShift;
}
}
}
else
{
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.OneToken];
}

tokenCache[scan[c]] = 1;
v = dqv >> dqShift;
}
dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v, xd.Bd);
++c;
ctx = GetCoefContext(nb, tokenCache, c);
dqv = dq[1];
}

r.Value = value;
r.Range = range;
r.Count = count;
return c;
}

private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y, uint txSizeInBlocks)
{
if (xd.MaxBlocksWide != 0)
{
if (txSizeInBlocks + x > xd.MaxBlocksWide)
{
ctxShiftA = (int)(txSizeInBlocks - (xd.MaxBlocksWide - x)) * 8;
}
}
if (xd.MaxBlocksHigh != 0)
{
if (txSizeInBlocks + y > xd.MaxBlocksHigh)
{
ctxShiftL = (int)(txSizeInBlocks - (xd.MaxBlocksHigh - y)) * 8;
}
}
}

private static PlaneType GetPlaneType(int plane)
{
return (PlaneType)(plane > 0 ? 1 : 0);
}

public static int DecodeBlockTokens(
ref TileWorkerData twd,
int plane,
Luts.ScanOrder sc,
int x,
int y,
TxSize txSize,
int segId)
{
ref Reader r = ref twd.BitReader;
ref MacroBlockD xd = ref twd.Xd;
ref MacroBlockDPlane pd = ref xd.Plane[plane];
ref Array2<short> dequant = ref pd.SegDequant[segId];
int eob;
Span<sbyte> a = pd.AboveContext.ToSpan().Slice(x);
Span<sbyte> l = pd.LeftContext.ToSpan().Slice(y);
int ctx;
int ctxShiftA = 0;
int ctxShiftL = 0;

switch (txSize)
{
case TxSize.Tx4x4:
ctx = a[0] != 0 ? 1 : 0;
ctx += l[0] != 0 ? 1 : 0;
eob = DecodeCoefs(
ref xd,
GetPlaneType(plane),
pd.DqCoeff.ToSpan(),
txSize,
ref dequant,
ctx,
sc.Scan,
sc.Neighbors,
ref r);
a[0] = l[0] = (sbyte)(eob > 0 ? 1 : 0);
break;
case TxSize.Tx8x8:
GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx8x8);
ctx = MemoryMarshal.Cast<sbyte, ushort>(a)[0] != 0 ? 1 : 0;
ctx += MemoryMarshal.Cast<sbyte, ushort>(l)[0] != 0 ? 1 : 0;
eob = DecodeCoefs(
ref xd,
GetPlaneType(plane),
pd.DqCoeff.ToSpan(),
txSize,
ref dequant,
ctx,
sc.Scan,
sc.Neighbors,
ref r);
MemoryMarshal.Cast<sbyte, ushort>(a)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftA);
MemoryMarshal.Cast<sbyte, ushort>(l)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftL);
break;
case TxSize.Tx16x16:
GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx16x16);
ctx = MemoryMarshal.Cast<sbyte, uint>(a)[0] != 0 ? 1 : 0;
ctx += MemoryMarshal.Cast<sbyte, uint>(l)[0] != 0 ? 1 : 0;
eob = DecodeCoefs(
ref xd,
GetPlaneType(plane),
pd.DqCoeff.ToSpan(),
txSize,
ref dequant,
ctx,
sc.Scan,
sc.Neighbors,
ref r);
MemoryMarshal.Cast<sbyte, uint>(a)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftA);
MemoryMarshal.Cast<sbyte, uint>(l)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftL);
break;
case TxSize.Tx32x32:
GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx32x32);
// NOTE: Casting to ulong here is safe because the default memory
// alignment is at least 8 bytes and the Tx32x32 is aligned on 8 byte
// boundaries.
ctx = MemoryMarshal.Cast<sbyte, ulong>(a)[0] != 0 ? 1 : 0;
ctx += MemoryMarshal.Cast<sbyte, ulong>(l)[0] != 0 ? 1 : 0;
eob = DecodeCoefs(
ref xd,
GetPlaneType(plane),
pd.DqCoeff.ToSpan(),
txSize,
ref dequant,
ctx,
sc.Scan,
sc.Neighbors,
ref r);
MemoryMarshal.Cast<sbyte, ulong>(a)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftA;
MemoryMarshal.Cast<sbyte, ulong>(l)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftL;
break;
default:
Debug.Assert(false, "Invalid transform size.");
eob = 0;
break;
}

return eob;
}
}
}

Large diffs are not rendered by default.

@@ -0,0 +1,12 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal static class Filter
{
public const int FilterBits = 7;

public const int SubpelBits = 4;
public const int SubpelMask = (1 << SubpelBits) - 1;
public const int SubpelShifts = 1 << SubpelBits;
public const int SubpelTaps = 8;
}
}

Large diffs are not rendered by default.

Large diffs are not rendered by default.

@@ -0,0 +1,73 @@
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using System;
using System.Diagnostics;

namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal static class Prob
{
public const int MaxProb = 255;

private static byte GetProb(uint num, uint den)
{
Debug.Assert(den != 0);
{
int p = (int)(((ulong)num * 256 + (den >> 1)) / den);
// (p > 255) ? 255 : (p < 1) ? 1 : p;
int clippedProb = p | ((255 - p) >> 23) | (p == 0 ? 1 : 0);
return (byte)clippedProb;
}
}

/* This function assumes prob1 and prob2 are already within [1,255] range. */
public static byte WeightedProb(int prob1, int prob2, int factor)
{
return (byte)BitUtils.RoundPowerOfTwo(prob1 * (256 - factor) + prob2 * factor, 8);
}

// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT;
private static readonly uint[] CountToUpdateFactor = new uint[]
{
0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64,
70, 76, 83, 89, 96, 102, 108, 115, 121, 128
};

private const int ModeMvCountSat = 20;

public static byte ModeMvMergeProbs(byte preProb, uint ct0, uint ct1)
{
uint den = ct0 + ct1;
if (den == 0)
{
return preProb;
}
else
{
uint count = Math.Min(den, ModeMvCountSat);
uint factor = CountToUpdateFactor[(int)count];
byte prob = GetProb(ct0, den);
return WeightedProb(preProb, prob, (int)factor);
}
}

private static uint TreeMergeProbsImpl(
uint i,
sbyte[] tree,
ReadOnlySpan<byte> preProbs,
ReadOnlySpan<uint> counts,
Span<byte> probs)
{
int l = tree[i];
uint leftCount = (l <= 0) ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs);
int r = tree[i + 1];
uint rightCount = (r <= 0) ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs);
probs[(int)(i >> 1)] = ModeMvMergeProbs(preProbs[(int)(i >> 1)], leftCount, rightCount);
return leftCount + rightCount;
}

public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan<byte> preProbs, ReadOnlySpan<uint> counts, Span<byte> probs)
{
TreeMergeProbsImpl(0, tree, preProbs, counts, probs);
}
}
}
@@ -0,0 +1,237 @@
using System;
using System.Buffers.Binary;
using Ryujinx.Common.Memory;

namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal struct Reader
{
private static readonly byte[] Norm = new byte[]
{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
private const int BdValueSize = sizeof(ulong) * 8;

// This is meant to be a large, positive constant that can still be efficiently
// loaded as an immediate (on platforms like ARM, for example).
// Even relatively modest values like 100 would work fine.
private const int LotsOfBits = 0x40000000;

public ulong Value;
public uint Range;
public int Count;
private ArrayPtr<byte> _buffer;

public bool Init(ArrayPtr<byte> buffer, int size)
{
if (size != 0 && buffer.IsNull)
{
return true;
}
else
{
_buffer = new ArrayPtr<byte>(ref buffer[0], size);
Value = 0;
Count = -8;
Range = 255;
Fill();
return ReadBit() != 0; // Marker bit
}
}

private void Fill()
{
ReadOnlySpan<byte> buffer = _buffer.ToSpan();
ReadOnlySpan<byte> bufferStart = buffer;
ulong value = Value;
int count = Count;
ulong bytesLeft = (ulong)buffer.Length;
ulong bitsLeft = bytesLeft * 8;
int shift = BdValueSize - 8 - (count + 8);

if (bitsLeft > BdValueSize)
{
int bits = (shift & unchecked((int)0xfffffff8)) + 8;
ulong nv;
ulong bigEndianValues = BinaryPrimitives.ReadUInt64BigEndian(buffer);
nv = bigEndianValues >> (BdValueSize - bits);
count += bits;
buffer = buffer.Slice(bits >> 3);
value = Value | (nv << (shift & 0x7));
}
else
{
int bitsOver = shift + 8 - (int)bitsLeft;
int loopEnd = 0;
if (bitsOver >= 0)
{
count += LotsOfBits;
loopEnd = bitsOver;
}

if (bitsOver < 0 || bitsLeft != 0)
{
while (shift >= loopEnd)
{
count += 8;
value |= (ulong)buffer[0] << shift;
buffer = buffer.Slice(1);
shift -= 8;
}
}
}

// NOTE: Variable 'buffer' may not relate to '_buffer' after decryption,
// so we increase '_buffer' by the amount that 'buffer' moved, rather than
// assign 'buffer' to '_buffer'.
_buffer = _buffer.Slice(bufferStart.Length - buffer.Length);
Value = value;
Count = count;
}

public bool HasError()
{
// Check if we have reached the end of the buffer.
//
// Variable 'count' stores the number of bits in the 'value' buffer, minus
// 8. The top byte is part of the algorithm, and the remainder is buffered
// to be shifted into it. So if count == 8, the top 16 bits of 'value' are
// occupied, 8 for the algorithm and 8 in the buffer.
//
// When reading a byte from the user's buffer, count is filled with 8 and
// one byte is filled into the value buffer. When we reach the end of the
// data, count is additionally filled with LotsOfBits. So when
// count == LotsOfBits - 1, the user's data has been exhausted.
//
// 1 if we have tried to decode bits after the end of stream was encountered.
// 0 No error.
return Count > BdValueSize && Count < LotsOfBits;
}

public int Read(int prob)
{
uint bit = 0;
ulong value;
ulong bigsplit;
int count;
uint range;
uint split = (Range * (uint)prob + (256 - (uint)prob)) >> 8;

if (Count < 0)
{
Fill();
}

value = Value;
count = Count;

bigsplit = (ulong)split << (BdValueSize - 8);

range = split;

if (value >= bigsplit)
{
range = Range - split;
value -= bigsplit;
bit = 1;
}

{
int shift = Norm[range];
range <<= shift;
value <<= shift;
count -= shift;
}
Value = value;
Count = count;
Range = range;

return (int)bit;
}

public int ReadBit()
{
return Read(128); // vpx_prob_half
}

public int ReadLiteral(int bits)
{
int literal = 0, bit;

for (bit = bits - 1; bit >= 0; bit--)
{
literal |= ReadBit() << bit;
}

return literal;
}

public int ReadTree(ReadOnlySpan<sbyte> tree, ReadOnlySpan<byte> probs)
{
sbyte i = 0;

while ((i = tree[i + Read(probs[i >> 1])]) > 0)
{
continue;
}

return -i;
}

public int ReadBool(int prob, ref ulong value, ref int count, ref uint range)
{
uint split = (range * (uint)prob + (256 - (uint)prob)) >> 8;
ulong bigsplit = (ulong)split << (BdValueSize - 8);

if (count < 0)
{
Value = value;
Count = count;
Fill();
value = Value;
count = Count;
}

if (value >= bigsplit)
{
range = range - split;
value = value - bigsplit;
{
int shift = Norm[range];
range <<= shift;
value <<= shift;
count -= shift;
}
return 1;
}
range = split;
{
int shift = Norm[range];
range <<= shift;
value <<= shift;
count -= shift;
}
return 0;
}

public ArrayPtr<byte> FindEnd()
{
// Find the end of the coded buffer
while (Count > 8 && Count < BdValueSize)
{
Count -= 8;
_buffer = _buffer.Slice(-1);
}
return _buffer;
}
}
}
@@ -0,0 +1,54 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal static class TxfmCommon
{
// Constants used by all idct/dct functions
public const int DctConstBits = 14;
public const int DctConstRounding = 1 << (DctConstBits - 1);

public const int UnitQuantShift = 2;
public const int UnitQuantFactor = 1 << UnitQuantShift;

// Constants:
// for (int i = 1; i < 32; ++i)
// Console.WriteLine("public const short CosPi{0}_64 = {1};", i, MathF.Round(16384 * MathF.Cos(i * MathF.PI / 64)));
// Note: sin(k * Pi / 64) = cos((32 - k) * Pi / 64)
public const short CosPi1_64 = 16364;
public const short CosPi2_64 = 16305;
public const short CosPi3_64 = 16207;
public const short CosPi4_64 = 16069;
public const short CosPi5_64 = 15893;
public const short CosPi6_64 = 15679;
public const short CosPi7_64 = 15426;
public const short CosPi8_64 = 15137;
public const short CosPi9_64 = 14811;
public const short CosPi10_64 = 14449;
public const short CosPi11_64 = 14053;
public const short CosPi12_64 = 13623;
public const short CosPi13_64 = 13160;
public const short CosPi14_64 = 12665;
public const short CosPi15_64 = 12140;
public const short CosPi16_64 = 11585;
public const short CosPi17_64 = 11003;
public const short CosPi18_64 = 10394;
public const short CosPi19_64 = 9760;
public const short CosPi20_64 = 9102;
public const short CosPi21_64 = 8423;
public const short CosPi22_64 = 7723;
public const short CosPi23_64 = 7005;
public const short CosPi24_64 = 6270;
public const short CosPi25_64 = 5520;
public const short CosPi26_64 = 4756;
public const short CosPi27_64 = 3981;
public const short CosPi28_64 = 3196;
public const short CosPi29_64 = 2404;
public const short CosPi30_64 = 1606;
public const short CosPi31_64 = 804;

// 16384 * sqrt(2) * sin(kPi / 9) * 2 / 3
public const short SinPi1_9 = 5283;
public const short SinPi2_9 = 9929;
public const short SinPi3_9 = 13377;
public const short SinPi4_9 = 15212;
}
}